##############################################################################################################
# Load Packages                                                                                              #
##############################################################################################################
library(data.table)
setDTthreads(0)
library(readstata13)
library(sas7bdat)
library(bit64)
library(lfe)
library(pbapply)
library(stargazer)
library(ggplot2)
library(tikzDevice)
library(gmodels)
library(survival)
library(readxl)
library(fabricatr)
library(haven)
library(car)
#rm(list=ls())
rm(list=ls()[!(ls() %in% ls(pattern="port.data"))])
##############################################################################################################
# Platform-specific variables                                                                                #
##############################################################################################################
trade.data.dir     <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/USImportsAndExports/"
asm.data.dir       <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/ASM/"
compustat.data.dir <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Compustat/"
PS.data.dir        <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Pierce Schott - AER 2013/data_files_aer_2013-1578/"
sox.event.dir      <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/SOX Event Study/"
UK.dir             <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/UK falsification/"
concentration.dir  <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concentration/"
processed.logfile.dir <- "C:/Users/omartian/Box/Edgar/Logfiles/Processed/"


##############################################################################################################
# Utility Functions                                                                                          #
##############################################################################################################
# Create a function to winsorize data
winsor = function (x, fraction=.01)
{
    if(length(fraction) != 1 || fraction < 0 ||
       fraction > 0.5) {
        stop("bad value for 'fraction'")
    }
    lim <- quantile(x, probs=c(fraction, 1-fraction), na.rm=TRUE)
    x[ x < lim[1] ] <- lim[1]
    x[ x > lim[2] ] <- lim[2]
    x
}
# inverse hyperbolic sine function
ihs <- function(x) {
    y <- log(x + sqrt(x^2 + 1))
    return(y)
}

##############################################################################################################
# Process Import Data                                                                                        #
##############################################################################################################
# Function Read Annual Import data by HS code and exporting country
load.imports = function(yr) {
    # Load in the Stata file of Census Data
    imports <- data.table(read.dta13(paste0(trade.data.dir, "imp_detl_yearly_", yr, "n.dta")))
    # Aggregate to NAICS, exporting country, year level 
    imports.agg <- imports[month==12,.(consumption.value = sum(con_val_yr), 
                                       duty = sum(cal_dut_yr), 
                                       dutiable.value=sum(dut_val_yr)),
                           keyby=.(year, naics, cty_code, rate_prov)]
}

# Read each year's data from 1989 - 2017
import.data <- do.call(rbind, pblapply(89:117, load.imports))

# Load the  cross-reference of country codes and country names 
countries <- data.table(read.fwf(paste0(trade.data.dir, "country.txt"), 
                                 c(4,-10,52,-2,2), 
                                 col.names=c("census.code", "name", "iso.code"), 
                                 colClasses=c("numeric", "character", "character"),
                                 skip=5)) 

# Link in the ISO 2-Character Country code for the exporting country
import.data <- merge(import.data, countries[,.(census.code, iso.code)], by.x="cty_code", by.y="census.code") 

# Clean up
rm(load.imports, countries)


##############################################################################################################
# Process ASM/CMF Data                                                                                       #
##############################################################################################################
# list of ASM files 
asm.files <- c("am1631as101.dat", "am1631gs101.dat", "am1631vs101.dat",
               "am1531as101.dat", "am1531gs101.dat", "am1531vs101.dat",
               "am1431as101.dat", "am1431gs101.dat", "am1431vs101.dat",
               "am1331as101.dat", "am1331gs101.dat", "am1331vs101.dat",
               "am1131as101.dat", "am1131as201.dat", "am1131gs101.dat", "am1131gs201.dat", "am1131vs101.dat", "am1131vs201.dat",
               "am1031as101.dat", "am1031gs101.dat", "am1031vs101.dat",
               "am0931as101.dat", "am0931gs101.dat", "am0931vs101.dat",
               "am0831as101.dat", "am0831gs101.dat", "am0831vs101.dat",
               "am0631as101.dat", "am0631gs101.dat", "am0631vs101.dat",
               "am0531as101.dat", "am0531as102.dat", "am0531as103.dat", "am0531gs101.dat", "am0531gs102.dat", "am0531gs103.dat", "am0531gs104.dat", "am0531gs105.dat", "am0531gs106.dat", "am0531gs107.dat", "am0531gs108.dat", "am0531vs101.dat",
               "am0431as101.dat", "am0431as102.dat", "am0431as103.dat", "am0431gs101.dat", "am0431gs102.dat", "am0431gs103.dat", "am0431gs104.dat", "am0431gs105.dat", "am0431gs106.dat", "am0431gs107.dat", "am0431vs101.dat", "am0431vs102.dat",
               "am0331as101.dat", "am0331as102.dat", "am0331as103.dat", "am0331gs101.dat", "am0331gs102.dat", "am0331gs103.dat", "am0331gs104.dat", "am0331gs105.dat", "am0331gs106.dat", "am0331gs107.dat", "am0331vs101.dat"
)

# Function to create a data table from a file name and append the file name as a column
read.asm.file = function(x) {
    temp <- fread(paste0(asm.data.dir, x), fill=TRUE)
    temp[,fn := x]
    temp
}

# Load all the files and create a list of data tables
asm.data <- lapply(asm.files, read.asm.file)


# Function to pull the variable names from a data table and make a new 1-column data table with them.
asm.col.names = function(dt) {
    temp.cols <- data.table(col=names(dt))
    temp.cols
}

# Pull the names from each data table that was loaded
asm.variable.names <- lapply(asm.data, asm.col.names)

# Add the file name to each table of variable names
asm.variable.names <- mapply(function(dt,n) {
    dt[,fn := n]
    dt
}, asm.variable.names, asm.files)
# Convert the matrix to a data table
asm.variable.names <- data.table(varname = unlist(asm.variable.names[1,]),fn = unlist(asm.variable.names[2,]))

# Cast into a wide table where the rows are the file names and the colums are the variables for ease of lookup
asm.variable.names[,n := 1]
asm.variable.names.w <- dcast(asm.variable.names, fn ~ varname, value.var = "n", fill = 0)
rm(asm.variable.names, asm.data, asm.col.names, asm.files)

# For each NAICS code, Year, and US Aggregation, obtain
#   EMP,                   # Employees
#   EMPAVTP,               # Employees (Average Total Production Workers)
#   HOURS,                 # Production Hours (1000)
#   PAYANN,                # Payroll ($1000)
#   PAYANPW,               # Production Workers Payroll ($1000)
#   VALADD,                # Value Added ($1000)
#   CSTMTOT,               # Cost of Materials ($1000)
#   RCPTOT,                # Total Value of Shipments ($1000)
#   CEXTOT,                # Capital Expenditures ($1000)
#   INVTOTE                # Total Inventories-Year End ($1000)
#

# 2016: am1631gs101.dat
asm.data.all<-read.asm.file("am1631gs101.dat")
asm.data.all <- asm.data.all[GEOTYPE==1 & YEAR==2016,.(YEAR,fn,NAICS=NAICS2012,NAICS_BASE="2012",
                                                       EMP,PAYANN,PAYANPW,HOURS,
                                                       VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)]
# 2015: am1531gs101.dat
temp<-read.asm.file("am1531gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2015,.(YEAR,fn,NAICS=NAICS2012,NAICS_BASE="2012",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2014: am1431gs101.dat 
temp<-read.asm.file("am1431gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2014,.(YEAR,fn,NAICS=NAICS2012,NAICS_BASE="2012",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2013: am1331gs101.dat 
temp<-read.asm.file("am1331gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2013,.(YEAR,fn,NAICS=NAICS2012,NAICS_BASE="2012",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2012: ec1231sg1.dat 
temp<-read.asm.file("ec1231sg1.dat"); 
temp1<- temp[GEOTYPE==1 & YEAR==2012,
             .(NAICS_BASE="2012",EMP=sum(EMP),PAYANN=sum(PAYANN),PAYANPW=sum(PAYANPW),HOURS=sum(HOURS),
               VALADD=sum(VALADD),CSTMTOT=sum(CSTMTOT),RCPTOT=sum(RCPTOT),CEXTOT=sum(CEXTOT),INVTOTE=sum(INVTOTE)), 
             keyby=.(YEAR, fn, NAICS2012)]
setnames(temp1, "NAICS2012", "NAICS")
asm.data.all <- rbind(asm.data.all, temp1[!is.na(NAICS)]); rm(temp,temp1)

# 2011: am1131gs101.dat 
temp<-read.asm.file("am1131gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2011,.(YEAR,fn,NAICS=NAICS2008,NAICS_BASE="2008",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2010: am1031gs101.dat 
temp<-read.asm.file("am1031gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2010,.(YEAR,fn,NAICS=NAICS2008,NAICS_BASE="2008",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2009: am0931gs101.dat 
temp<-read.asm.file("am0931gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2009,.(YEAR,fn,NAICS=NAICSASM,NAICS_BASE="2008",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2008: am0831gs101.dat 
temp<-read.asm.file("am0831gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2008,.(YEAR,fn,NAICS=NAICSASM,NAICS_BASE="ASM",
                                                                   EMP,PAYANN,PAYANPW,HOURS,
                                                                   VALADD,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2007: ec0721sg2.dat 
temp<-read.asm.file("ec0731sg2.dat"); 
temp1<- temp[GEOTYPE==1 & YEAR==2007,
             .(NAICS_BASE="2007",EMP=sum(EMP),PAYANN=sum(PAYANN),PAYANPW=sum(PAYANPW),HOURS=sum(HOURS),
               VALADD=sum(VALADD),CSTMTOT=sum(CSTMTOT),RCPTOT=sum(RCPTOT),CEXTOT=sum(CEXTOT),INVTOTE=sum(INVTOTE)), 
             keyby=.(YEAR, fn, NAICS2007)]
setnames(temp1, "NAICS2007", "NAICS")
asm.data.all <- rbind(asm.data.all, temp1[!is.na(NAICS)]); rm(temp,temp1)


# 2006: am0631gs101.dat 
temp<-read.asm.file("am0631gs101.dat"); 
asm.data.all <- rbind(asm.data.all, temp[GEOTYPE==1 & YEAR==2006,.(YEAR,fn,NAICS=NAICS2003,NAICS_BASE="2003",
                                                                   EMP=EMPSMAO,PAYANN,PAYANPW,HOURS=HRSTOTM,
                                                                   VALADD=VALADDM,CSTMTOT,RCPTOT,CEXTOT,INVTOTE)])
# 2005: am0531gs102.dat, am0531gs105.dat, am0531gs106.dat 
temp<-read.asm.file("am0531gs102.dat"); temp<-temp[GEOTYPE==1 & YEAR==2005,.(YEAR,fn,NAICS=NAICS2003,NAICS_BASE="2003",
                                                                             EMP,PAYANN=PAYANT,PAYANPW=PAYANTP,HOURS,
                                                                             VALADD,CSTMTOT,RCPTOT)]
temp1<-read.asm.file("am0531gs105.dat");temp1<-temp1[GEOTYPE==1 & YEAR==2005,.(NAICS=NAICS2003,CEXTOT)]
temp2<-read.asm.file("am0531gs106.dat");temp2<-temp2[GEOTYPE==1 & YEAR==2005,.(NAICS=NAICS2003,INVTOTE)]
temp<-merge(temp,temp1,all.x=TRUE,by="NAICS"); rm(temp1)
temp<-merge(temp,temp2,all.x=TRUE,by="NAICS"); rm(temp2)
asm.data.all <- rbind(asm.data.all, temp)

# 2004: am0431gs102.dat, am0431gs105.dat, am0431gs106.dat 
temp<-read.asm.file("am0431gs102.dat"); temp<-temp[GEOTYPE==1 & YEAR==2004,.(YEAR,fn,NAICS=NAICS2003,NAICS_BASE="2003",
                                                                             EMP,PAYANN=PAYANT,PAYANPW=PAYANTP,HOURS,
                                                                             VALADD=as.integer64(VALADD),CSTMTOT,RCPTOT)]
temp1<-read.asm.file("am0431gs105.dat"); temp1<-temp1[GEOTYPE==1 & YEAR==2004,.(NAICS=NAICS2003,CEXTOT)]
temp2<-read.asm.file("am0431gs106.dat"); temp2<-temp2[GEOTYPE==1 & YEAR==2004,.(NAICS=NAICS2003,INVTOTE)]
temp<-merge(temp,temp1,all.x=TRUE,by="NAICS"); rm(temp1)
temp<-merge(temp,temp2,all.x=TRUE,by="NAICS"); rm(temp2)
asm.data.all <- rbind(asm.data.all, temp)

# 2003: am0331gs102.dat, am0331gs105.dat, am0331gs106.dat 
temp<-read.asm.file("am0331gs102.dat"); temp<-temp[GEOTYPE==1 & YEAR==2003,.(YEAR,fn,NAICS=NAICS2003,NAICS_BASE="2003",
                                                                             EMP,PAYANN=PAYANT,PAYANPW=PAYANTP,HOURS,
                                                                             VALADD=as.integer64(VALADD),
                                                                             CSTMTOT=as.integer64(CSTMTOT),
                                                                             RCPTOT=as.integer64(RCPTOT))]
temp1<-read.asm.file("am0331gs105.dat");temp1<-temp1[GEOTYPE==1 & YEAR==2003,.(NAICS=NAICS2003,CEXTOT)]
temp2<-read.asm.file("am0331gs106.dat");temp2<-temp2[GEOTYPE==1 & YEAR==2003,.(NAICS=NAICS2003,INVTOTE)]
temp<-merge(temp,temp1,all.x=TRUE,by="NAICS"); rm(temp1)
temp<-merge(temp,temp2,all.x=TRUE,by="NAICS"); rm(temp2)
asm.data.all <- rbind(asm.data.all, temp); rm(temp)

# 2002: am0231gs102.dat, am0231gs105.dat,
temp<-read.asm.file("ec0231sg102.dat"); temp<-temp[GEOTYPE==1 & YEAR==2002,.(YEAR,fn,NAICS=NAICS2002,NAICS_BASE="2002",
                                                                             EMP,PAYANN=PAYANT,PAYANPW=EMPAVTP,HOURS,
                                                                             VALADD=as.integer64(VALADD),
                                                                             CSTMTOT=as.integer64(CSTMTOT),
                                                                             RCPTOT=as.integer64(RCPTOT),
                                                                             CEXTOT,INVTOTE=NA)]
#temp1<-read.asm.file("ec0231sg104.dat"); temp1[GEOTYPE==1 & YEAR==2002,.(NAICS2002,NAICS_BASE="2002",INVTOTE)]
#temp<-merge(temp,temp1,all.x=TRUE,by="NAICS"); rm(temp1)
asm.data.all <- rbind(asm.data.all, temp); rm(temp)

# 2001-1997: from parsed PDF of "Statistics of Industry Groups and Industries: 2001"
temp <- read.asm.file("m01as-anospace.csv"); temp<-temp[,.(YEAR,fn,NAICS,NAICS_BASE="1997",EMP=as.integer(EMP),PAYANN=as.integer(PAYANN),
                                                           PAYANPW=as.integer(PAYANPW),HOURS=as.integer(HOURS),
                                                           VALADD=as.integer64(VALADD),CSTMTOT=as.integer64(CSTMTOT),
                                                           RCPTOT=as.integer64(RCPTOT),CEXTOT=NA,INVTOTE=NA)]
asm.data.all <- rbind(asm.data.all, temp); rm(temp)
asm.data.all[,VALADD := as.numeric(VALADD)]
asm.data.all[,CSTMTOT := as.numeric(CSTMTOT)]
asm.data.all[,RCPTOT := as.numeric(RCPTOT)]



##############################################################################################################
# Consolidate ASM Data to NAICS4-year                                                                        #
##############################################################################################################
asm.data.n4 <- rbind(asm.data.all[nchar(NAICS)==4],
                     asm.data.all[nchar(NAICS)==6 & YEAR %in% c(2007,2012), 
                                  .(EMP = sum(EMP),
                                    PAYANN = sum(PAYANN),
                                    PAYANPW = sum(PAYANPW),
                                    HOURS = sum(HOURS),
                                    VALADD = sum(VALADD),
                                    CSTMTOT = sum(CSTMTOT),
                                    RCPTOT = sum(RCPTOT),
                                    CEXTOT = sum(CEXTOT),
                                    INVTOTE = sum(INVTOTE)),
                                  keyby=.(YEAR,fn,NAICS=substr(NAICS,1,4),NAICS_BASE)])


##############################################################################################################
# Load Compustat Data                                                                                        #
##############################################################################################################
compustat <- fread(paste0(compustat.data.dir, "compustatfrom1993debt.csv"), colClasses = c(naicsh="character"))
compustat[,best.n4 := ifelse(nchar(naicsh)>=4, substr(naicsh, 1, 4), ifelse(nchar(naics) >= 4, substr(naics, 1, 4), NA))]
# Export Compustat Identifiers for JAR data replication policy
fwrite(compustat[,.(gvkey,fyear)], "C:/Users/omartian/GO/Identifiers/CompustatIdentifiers.csv")

# Segment data
compustat.seg <- fread(paste0(compustat.data.dir, "compsegment.csv"))
# Remove duplicates caused by restated segments
setorder(compustat.seg, gvkey, sid, datadate, -srcdate)
compustat.seg <- compustat.seg[,.SD[1],by=.(gvkey,sid,datadate)]
compustat.seg <- compustat.seg[!is.na(sales)]
# Export Compustat Segment Identifiers for JAR data replication policy
fwrite(compustat.seg[,.(gvkey,sid,datadate)], "C:/Users/omartian/GO/Identifiers/CompustatSegmentIdentifiers.csv")


compustat.seg <- merge(compustat.seg, unique(compustat[,.(gvkey,datadate,fyear)]), by=c("gvkey","datadate"))
foreign.sales <- compustat.seg[geotp==3 & stype=="GEOSEG",.(for.sales=sum(sales)),keyby=.(gvkey,fyear)]
seg.sales <- compustat.seg[stype=="BUSSEG",.(seg.sales=sum(sales)),keyby=.(gvkey,fyear,seg.n4 = substr(NAICSS1,1,4))]
seg.sales <- seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4] 

compustat <- merge(compustat, foreign.sales, all.x=TRUE, by=c("gvkey","fyear"))
compustat[,foreign.sales := ifelse(is.na(for.sales), 0, for.sales)]
compustat[,for.sales := NULL]
compustat[,domestic.sales := sale - foreign.sales]
# Aggregate to the Year-NAICS4 Code
compustat.agg.n4 <- compustat[loc=="USA" & curcd=="USD" & !is.na(best.n4),
                              .(FIRMS          = .N,
                                SALE           = 1000 * sum(sale, na.rm=TRUE),
                                DOMESTIC.SALE  = 1000 * sum(domestic.sales, na.rm=TRUE),
                                COGS           = 1000 * sum(cogs, na.rm=TRUE),
                                NI             = 1000 * sum(ni,   na.rm=TRUE),
                                CAPX           = 1000 * sum(capx, na.rm=TRUE),
                                DT             = 1000 * sum(dt,   na.rm=TRUE),
                                AT             = 1000 * sum(at,   na.rm=TRUE),
                                EMP.Compustat  = 1000 * sum(emp,  na.rm=TRUE)),
                              keyby=.(fyear, best.n4)]
setnames(compustat.agg.n4, "fyear", "YEAR")

# Now take care of business segment adjustments
## Pick the newest segments for a year
newest.seg.date <- compustat.seg[stype=="BUSSEG",.SD[1],keyby=.(gvkey,fyear,-srcdate)][,.(gvkey,fyear,srcdate=-srcdate)]
seg.sales <- merge(newest.seg.date, compustat.seg, by=c("gvkey","fyear","srcdate"))[stype=="BUSSEG",.(seg.sales=sum(sales)),keyby=.(gvkey,fyear,seg.n4 = substr(NAICSS1,1,4))]
compustat.merged.bs <- merge(compustat, seg.sales, by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD",.(gvkey,fyear,best.n4,seg.n4,sale,seg.sales)]
compustat.merged.bs[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
compustat.merged.bs[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
compustat.merged.bs <- compustat.merged.bs[!is.na(best.n4.incseg)]
compustat.merged.bs <- compustat.merged.bs[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
compustat.merge.bs.agg <- compustat.merged.bs[,.(BSEG.FIRMS          = .N,
                                                 BSEG.SALE      = 1000 * sum(best.n4.incseg.sales, na.rm=TRUE)),
                                              keyby=.(fyear, best.n4.incseg)]
setnames(compustat.merge.bs.agg, "fyear", "YEAR")
setnames(compustat.merge.bs.agg, "best.n4.incseg", "best.n4")

rm(compustat.seg, compustat.merged.bs, newest.seg.date)

##############################################################################################################
# Merge ASM, Compustat, and Import/Export data - first to industry-year aggregate                            #
##############################################################################################################
# Aggregate imports to the industry-year
aggregated.imports.n4<-import.data[nchar(naics) >= 4 & naics > 3100 & naics < 3400,
                                   .(imports = sum(consumption.value), 
                                     dutiable.imports = sum(dutiable.value),
                                     tariff  = sum(duty),
                                     dutiable.percentage = 100 * sum(dutiable.value)/ sum(consumption.value)),
                                   keyby=.(year,n4 = substr(naics,1,4))] 
aggregated.imports.n4[,ave.duty.rate.overall := 100 * tariff/imports]
aggregated.imports.n4[,ave.duty.rate.dutiable := 100 * tariff/dutiable.imports]
setnames(aggregated.imports.n4, "year", "YEAR")

# merge the data
combined.data.n4 <- merge(asm.data.n4, compustat.agg.n4, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))
combined.data.n4 <- merge(combined.data.n4, aggregated.imports.n4, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "n4"))
combined.data.n4 <- merge(combined.data.n4, compustat.merge.bs.agg, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))

# fill in 0's if there's not compustat link
combined.data.n4$FIRMS[is.na(combined.data.n4$FIRMS)] <- 0
combined.data.n4$SALE[is.na(combined.data.n4$SALE)] <- 0
combined.data.n4$DOMESTIC.SALE[is.na(combined.data.n4$DOMESTIC.SALE)] <- 0
combined.data.n4$BSEG.FIRMS[is.na(combined.data.n4$BSEG.FIRMS)] <- 0
combined.data.n4$BSEG.SALE[is.na(combined.data.n4$BSEG.SALE)] <- 0
combined.data.n4$COGS[is.na(combined.data.n4$COGS)] <- 0
combined.data.n4$NI[is.na(combined.data.n4$NI)] <- 0
combined.data.n4$CAPX[is.na(combined.data.n4$CAPX)] <- 0
combined.data.n4$DT[is.na(combined.data.n4$DT)] <- 0
combined.data.n4$AT[is.na(combined.data.n4$AT)] <- 0
combined.data.n4$EMP.Compustat[is.na(combined.data.n4$EMP.Compustat)] <- 0

# fill in 0's if there's no imports 
combined.data.n4$imports[is.na(combined.data.n4$imports)] <- 0

# Export Compustat Segment Identifiers for JAR data replication policy
fwrite(combined.data.n4[,.(YEAR,NAICS)], "C:/Users/omartian/GO/Identifiers/CombinedDataIdentifiers.csv")

# clean up
rm(aggregated.imports.n4, compustat.agg.n4, asm.data.n4, compustat.merge.bs.agg)



##############################################################################################################
# Calculate Public firm Prominence measures                                                                  #
##############################################################################################################
combined.data.n4[,imports.K := imports / 1000]
combined.data.n4[,alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE/(as.numeric(RCPTOT)))]
combined.data.n4[,W.alt2.public.presence := winsor(alt2.public.presence)] 

##############################################################################################################
# Calculate Concentration Variables and other ASM-based controls                                             #
##############################################################################################################
conc.02 <- fread(paste0(concentration.dir, "ECN_2002_US_31SR12_with_ann.csv"))
conc.07 <- fread(paste0(concentration.dir, "ECN_2007_US_31SR12_with_ann.csv"))
conc.12 <- fread(paste0(concentration.dir, "ECN_2012_US_31SR2_with_ann.csv" ))
conc <- rbind(conc.02, conc.07, conc.12)
conc <- conc[nchar(NAICS.id)==4, .(YEAR=YEAR.id, NAICS=NAICS.id, COMPANY, RCPTOT, CCORCPPCT, CONC.MEASURE = `CONCENFI.display-label`)]
conc[,COMPANY   := as.numeric(COMPANY)]
conc[,RCPTOT    := as.numeric(RCPTOT)]
conc[,CCORCPPCT := as.numeric(CCORCPPCT)]
conc[,CONC.MEASURE := ifelse(CONC.MEASURE=="4 largest companies",  "pct4",
                             ifelse(CONC.MEASURE=="8 largest companies",  "pct8",
                                    ifelse(CONC.MEASURE=="20 largest companies", "pct20",
                                           ifelse(CONC.MEASURE=="50 largest companies", "pct50", "All companies"))))]
conc <- merge(conc[CONC.MEASURE=="All companies",.(YEAR, NAICS, COMPANY, RCPTOT)],
              dcast(conc[CONC.MEASURE!="All companies",.(YEAR, NAICS, CONC.MEASURE, CCORCPPCT)],
                    YEAR + NAICS ~ CONC.MEASURE, value.var = "CCORCPPCT"), by=c("YEAR", "NAICS"))
conc[,ave.company.size := RCPTOT/COMPANY]

# Plug Concentration data linearly 1997-2007, 2007-2016
concslopes <- dcast(conc, NAICS ~ YEAR, value.var = c("COMPANY", "RCPTOT", "pct20", "pct4", "pct50", "pct8", "ave.company.size"))
concslopes[, m1_ave.company.size := (ave.company.size_2007 - ave.company.size_2002) / 5]
concslopes[, m2_ave.company.size := (ave.company.size_2012 - ave.company.size_2007) / 5]
concslopes[, m1_pct4             := (pct4_2007             - pct4_2002            ) / 5]
concslopes[, m2_pct4             := (pct4_2012             - pct4_2007            ) / 5]
concslopes[, m1_pct8             := (pct8_2007             - pct8_2002            ) / 5]
concslopes[, m2_pct8             := (pct8_2012             - pct8_2007            ) / 5]
concslopes[, m1_pct20            := (pct20_2007            - pct20_2002           ) / 5]
concslopes[, m2_pct20            := (pct20_2012            - pct20_2007           ) / 5]
concslopes[, m1_pct50            := (pct50_2007            - pct50_2002           ) / 5]
concslopes[, m2_pct50            := (pct50_2012            - pct50_2007           ) / 5]
conc.plugged <- CJ(YEAR=unique(combined.data.n4$YEAR), NAICS=concslopes$NAICS)
conc.plugged <- merge(conc.plugged, concslopes, by="NAICS")
conc.plugged[,ave.company.size := ifelse(YEAR < 2007, ave.company.size_2007 + (m1_ave.company.size * (YEAR-2007)),
                                         ave.company.size_2007 + (m2_ave.company.size * (YEAR-2007)))]
conc.plugged[,pct4 := ifelse(YEAR < 2007, pct4_2007 + (m1_pct4 * (YEAR-2007)),
                             pct4_2007 + (m2_pct4 * (YEAR-2007)))]
conc.plugged[,pct8 := ifelse(YEAR < 2007, pct8_2007 + (m1_pct8 * (YEAR-2007)),
                             pct8_2007 + (m2_pct8 * (YEAR-2007)))]
conc.plugged[,pct20 := ifelse(YEAR < 2007, pct20_2007 + (m1_pct20 * (YEAR-2007)),
                              pct20_2007 + (m2_pct20 * (YEAR-2007)))]
conc.plugged[,pct50 := ifelse(YEAR < 2007, pct50_2007 + (m1_pct50 * (YEAR-2007)),
                              pct50_2007 + (m2_pct50 * (YEAR-2007)))]
conc.plugged <- conc.plugged[,.(YEAR, NAICS, ave.company.size, pct4, pct8, pct20, pct50)]
conc.plugged[,NAICS := as.character(NAICS)]
combined.data.n4 <- merge(combined.data.n4, conc.plugged, by=c("YEAR", "NAICS"))
rm(conc, conc.02, conc.07, conc.12, concslopes, conc.plugged)

# Create Control variables
combined.data.n4[,VALADD.perc  := VALADD/RCPTOT]
combined.data.n4[,PAYANN.perc  := PAYANN/RCPTOT]
combined.data.n4[,WAGERATE     := PAYANPW/HOURS]
combined.data.n4[,CEXTOT.perc  := CEXTOT/RCPTOT]
combined.data.n4[,INVTOTE.perc := INVTOTE/RCPTOT]

##############################################################################################################
# Related Party Trade Variables                                                                              #
##############################################################################################################
relatedparty <- fread(paste0(trade.data.dir, "relatedparty.csv"))
relatedparty[, n6 := substr(naics, 1, 6)]
relatedparty.n4 <- relatedparty[,.(imp_tot = sum(imp_tot),
                                   imp_rel = sum(imp_rel),
                                   imp_non = sum(imp_non),
                                   imp_not = sum(imp_not)),
                                keyby=.(year, n4=substr(naics, 1, 4))]
relatedparty04 <- fread(paste0(trade.data.dir, "relatedparty00-04.csv"))
relatedparty04[,imp_tot := as.integer64(gsub(",", "", imp_tot)) * 1000000]
relatedparty04[,imp_rel := as.integer64(gsub(",", "", imp_rel)) * 1000000]
relatedparty.n4 <- rbind(relatedparty.n4[,.(year,n4,imp_tot,imp_rel)],
                         relatedparty04[,.(year,n4=as.character(NAICS4),imp_tot,imp_rel)])
rm(relatedparty, relatedparty04)
setkey(relatedparty.n4, n4, year)

combined.data.n4 <- merge(combined.data.n4, relatedparty.n4, by.x=c("NAICS","YEAR"), by.y=c("n4", "year"), all.x = TRUE)
rm(relatedparty.n4)
combined.data.n4[, frac.related := imp_rel/imp_tot]


##############################################################################################################
# Link in other high-income country imports                                                                  #
##############################################################################################################
# File Locations
baci.dir <- "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/BACI/"

# Other High income countries  from Autor, Dorn, Hanson (AER 2013)
oth.hi.income      <- c('AU','DK','FI','DE','JP','NZ','ES','CH')
oth.hi.income.noDE <- c('AU','DK','FI',     'JP','NZ','ES','CH')


# Load Peter Schott's HS-NAICS concordances
import.concordance <- data.table(read.dta13("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concordances/hssicnaics_20181015/hs_sic_naics_imports_89_117_20180927.dta"))
export.concordance <- data.table(read.dta13("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concordances/hssicnaics_20181015/hs_sic_naics_exports_89_117_20180927.dta"))
import.concordance[,hs.6 := round(commodity/10000, 0)]
export.concordance[,hs.6 := round(commodity/10000, 0)]
import.concordance.n  <- unique(import.concordance[naics >310000 & naics <340000,.(year,hs.6,naics)])
export.concordance.n  <- unique(export.concordance[naics >310000 & naics <340000,.(year,hs.6,naics)])
naics.hs.concordance.n <- rbind(import.concordance.n[,.(year,hs.6,naics,source="i")],
                                export.concordance.n[,.(year,hs.6,naics,source="x")])
setorder(naics.hs.concordance.n, year, hs.6, naics, source)
naics.hs.concordance.n <- naics.hs.concordance.n[,.SD[1],by=.(year,hs.6,naics)]
naics.hs.concordance.n <- merge(naics.hs.concordance.n, 
                                naics.hs.concordance.n[,.N,by=.(year,hs.6)],
                                by=c("year","hs.6"))
setnames(naics.hs.concordance.n,"N","naics.per.hs")

import.concordance.nX <- unique(import.concordance[naicsX>310000 & naicsX<340000,.(year,hs.6,naicsX)])
export.concordance.nX <- unique(import.concordance[naicsX>310000 & naicsX<340000,.(year,hs.6,naicsX)])
naics.hs.concordance.nX <- rbind(import.concordance.nX[,.(year,hs.6,naicsX,source="i")],
                                 export.concordance.nX[,.(year,hs.6,naicsX,source="x")])
setorder(naics.hs.concordance.nX, year, hs.6, naicsX, source)
naics.hs.concordance.nX <- naics.hs.concordance.nX[,.SD[1],by=.(year,hs.6,naicsX)]
naics.hs.concordance.nX <- merge(naics.hs.concordance.nX, 
                                 naics.hs.concordance.nX[,.N,by=.(year,hs.6)],
                                 by=c("year","hs.6"))
setnames(naics.hs.concordance.nX,"N","naicsX.per.hs")

rm(import.concordance, import.concordance.n, import.concordance.nX,
   export.concordance, export.concordance.n, export.concordance.nX)

# Load Country codes (differ depending on which HS vintage using)
baci.countries.hs92 <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/baci/country_code_baci_HS92.csv")
baci.countries.hs92[,hs.year := 1992]
baci.countries.hs96 <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/baci/country_code_baci_HS96.csv")
baci.countries.hs96[,hs.year := 1996]
baci.countries.hs02 <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/baci/country_code_baci_HS02.csv")
baci.countries.hs02[,hs.year := 2002]
baci.countries.hs07 <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/baci/country_code_baci_HS07.csv")
baci.countries.hs07[,hs.year := 2007]
baci.countries.hs12 <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/baci/country_code_baci_HS12.csv")
baci.countries.hs12[,hs.year := 2012]
baci.countries <- rbindlist(list(baci.countries.hs92, baci.countries.hs96, baci.countries.hs02,
                                 baci.countries.hs07, baci.countries.hs12))
setnames(baci.countries, c(1,2,3,4,5), c("cepii.country.code", "country.name", "abbr.country.name", "iso2", "iso3"))
rm(baci.countries.hs92, baci.countries.hs96, baci.countries.hs02, baci.countries.hs07, baci.countries.hs12)

# Function to process each logfile 
process.baci.data = function(filename) {
    
    hs.year <- substr(filename, 5,6)
    hs.year <- ifelse(hs.year=='92', 1992,
                      ifelse(hs.year=='96', 1996,
                             ifelse(hs.year=='02', 2002,
                                    ifelse(hs.year=='07', 2007, 2012))))
    # - Unzip the file 
    unzip(zipfile = paste0(baci.dir, filename, ".zip"), 
          #files   = paste0(filename, ".csv"),
          exdir   = paste0(baci.dir, "Temp"))
    
    # - Load the unzipped .csv file into a data table
    baci <- fread(paste0(baci.dir, "Temp/", filename, ".csv"))
    # Key to data:
    #   i = exporter
    #   j = importer
    #   t = year
    #   hs6 = commodity
    #   q = quantity (tons)
    #   v = value (1000s USD)
    
    # - Delete the extracted .csv file
    file.remove(paste0(baci.dir, "Temp/", filename, ".csv"))
    
    # link in the country codes based on HS vintage
    baci[,hs.year := hs.year]
    baci <- merge(baci, baci.countries[,.(i=cepii.country.code,hs.year,i.iso2=iso2)], by=c("i", "hs.year"))
    baci <- merge(baci, baci.countries[,.(j=cepii.country.code,hs.year,j.iso2=iso2)], by=c("j", "hs.year"))
    baci <- merge(baci, naics.hs.concordance.n[,.(year,hs.6,naics,naics.per.hs)], 
                  by.x=c("hs.year","hs6"), by.y=c("year","hs.6"), allow.cartesian = TRUE)
    # Divide v and q by naics.per.hs - when we don't know the naics per the hs6 code because of multiple matches, split evenly
    baci[,v:=v/naics.per.hs]
    baci[,q:=q/naics.per.hs]
    # Aggregate to the i,j,t,n4 level
    baci <- baci[,.(tot.v=sum(v),tot.q=sum(q)),keyby=.(t,hs.year,naics=substr(naics,1,4),i.iso2,j.iso2)]
}



baci.data <- pblapply(list("baci12_2017", "baci12_2016", "baci12_2015", "baci12_2014", "baci12_2013", "baci12_2012",
                           "baci07_2011", "baci07_2010", "baci07_2009", "baci07_2008", 
                           "baci02_2007", "baci02_2006", "baci02_2005", "baci02_2004", "baci02_2003",
                           "baci96_2002", "baci96_2001", "baci96_2000", "baci96_1999", "baci96_1998",
                           "baci92_1997", "baci92_1996", "baci92_1995"), process.baci.data)
baci.data <- rbindlist(baci.data)


# Export BACI Identifiers for JAR data replication policy
fwrite(baci.data[,.(t,naics,i.iso2,j.iso2)], "C:/Users/omartian/GO/Identifiers/BACIIdentifiers.csv")

# Imports to high-income countries
oth.hi.inc.imports      <- baci.data[j.iso2 %in% oth.hi.income, 
                                     .(aggregate.v=sum(tot.v), aggregate.q=sum(tot.q)),
                                     keyby=.(naics,t)]
setorder(oth.hi.inc.imports, naics, t)                       
oth.hi.inc.imports[, lead.q := shift(.SD, type="lead"), by=naics, .SDcols="aggregate.q"]
oth.hi.inc.imports[, lead.v := shift(.SD, type="lead"), by=naics, .SDcols="aggregate.v"]
oth.hi.inc.imports[, lag.q := shift(.SD, type="lag"), by=naics, .SDcols="aggregate.q"]
oth.hi.inc.imports[, lag.v := shift(.SD, type="lag"), by=naics, .SDcols="aggregate.v"]
oth.hi.inc.imports[, q.growth.tplus1 := (lead.q - aggregate.q)/aggregate.q]
oth.hi.inc.imports[, v.growth.tplus1 := (lead.v - aggregate.v)/aggregate.v]
oth.hi.inc.imports[, q.growth.t := (aggregate.q - lag.q)/lag.q]
oth.hi.inc.imports[, v.growth.t := (aggregate.v - lag.v)/lag.v]
oth.hi.inc.imports <- merge(oth.hi.inc.imports, 
                            oth.hi.inc.imports[t==1997,.(q.1997=aggregate.q, v.1997=aggregate.v, naics)],
                            by="naics")
oth.hi.inc.imports[, q.scaled97 := aggregate.q/q.1997]
oth.hi.inc.imports[, v.scaled97 := aggregate.v/v.1997]


oth.hi.inc.imports.noDE <- baci.data[j.iso2 %in% oth.hi.income.noDE, 
                                     .(aggregate.v=sum(tot.v), aggregate.q=sum(tot.q)),
                                     keyby=.(naics,t)]
setorder(oth.hi.inc.imports.noDE, naics, t)
oth.hi.inc.imports.noDE[, lead.q := shift(.SD, type="lead"), by=naics, .SDcols="aggregate.q"]
oth.hi.inc.imports.noDE[, lead.v := shift(.SD, type="lead"), by=naics, .SDcols="aggregate.v"]
oth.hi.inc.imports.noDE[, lag.q := shift(.SD, type="lag"), by=naics, .SDcols="aggregate.q"]
oth.hi.inc.imports.noDE[, lag.v := shift(.SD, type="lag"), by=naics, .SDcols="aggregate.v"]
oth.hi.inc.imports.noDE[, q.growth.tplus1 := (lead.q - aggregate.q)/aggregate.q]
oth.hi.inc.imports.noDE[, v.growth.tplus1 := (lead.v - aggregate.v)/aggregate.v]
oth.hi.inc.imports.noDE[, q.growth.t := (aggregate.q - lag.q)/lag.q]
oth.hi.inc.imports.noDE[, v.growth.t := (aggregate.v - lag.v)/lag.v]
oth.hi.inc.imports.noDE <- merge(oth.hi.inc.imports.noDE, 
                                 oth.hi.inc.imports.noDE[t==1997,.(q.1997=aggregate.q, v.1997=aggregate.v, naics)],
                                 by="naics")
oth.hi.inc.imports.noDE[, q.scaled97 := aggregate.q/q.1997]
oth.hi.inc.imports.noDE[, v.scaled97 := aggregate.v/v.1997]




combined.data.n4 <- merge(combined.data.n4, oth.hi.inc.imports, by.x=c("NAICS", "YEAR"), by.y=c("naics", "t"), all.x=TRUE)



##############################################################################################################
# Create Lags for levels specification                                                                       #
##############################################################################################################
setorder(combined.data.n4, NAICS, YEAR)
combined.data.n4[, lag.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.alt2.public.presence"]
combined.data.n4[, RCPTOT := as.numeric(RCPTOT)]
combined.data.n4[, lag.RCPTOT                  := shift(.SD), by=NAICS, .SDcols="RCPTOT"]
combined.data.n4[, lag2.RCPTOT                 := shift(.SD), by=NAICS, .SDcols="lag.RCPTOT"]
combined.data.n4[, lag3.RCPTOT                 := shift(.SD), by=NAICS, .SDcols="lag2.RCPTOT"]
combined.data.n4[, lag.SALE                    := shift(.SD), by=NAICS, .SDcols="SALE"]
combined.data.n4[, lag2.SALE                   := shift(.SD), by=NAICS, .SDcols="lag.SALE"]
combined.data.n4[, lag.VALADD.perc             := shift(.SD), by=NAICS, .SDcols="VALADD.perc"]
combined.data.n4[, lag.PAYANN.perc             := shift(.SD), by=NAICS, .SDcols="PAYANN.perc"]
combined.data.n4[, lag.PAYANN                  := shift(.SD), by=NAICS, .SDcols="PAYANN"]
combined.data.n4[, lag.WAGERATE                := shift(.SD), by=NAICS, .SDcols="WAGERATE"]
combined.data.n4[, lag.CEXTOT.perc             := shift(.SD), by=NAICS, .SDcols="CEXTOT.perc"]
combined.data.n4[, lag.INVTOTE.perc            := shift(.SD), by=NAICS, .SDcols="INVTOTE.perc"]
combined.data.n4[, lag.ave.duty.rate.overall   := shift(.SD), by=NAICS, .SDcols="ave.duty.rate.overall"]
combined.data.n4[, lag.pct4                    := shift(.SD), by=NAICS, .SDcols="pct4"]
combined.data.n4[, lag.pct8                    := shift(.SD), by=NAICS, .SDcols="pct8"]
combined.data.n4[, lag.pct20                   := shift(.SD), by=NAICS, .SDcols="pct20"]
combined.data.n4[, lag.pct50                   := shift(.SD), by=NAICS, .SDcols="pct50"]

combined.data.n4[, lag.frac.related            := shift(.SD), by=NAICS, .SDcols="frac.related"]
combined.data.n4[, lag.imports.K               := shift(.SD), by=NAICS, .SDcols="imports.K"]
combined.data.n4[, lag2.W.alt2.public.presence := shift(.SD), by=NAICS, .SDcols="lag.W.alt2.public.presence"]
combined.data.n4[, lag.alt2.public.presence    := shift(.SD), by=NAICS, .SDcols="alt2.public.presence"]
combined.data.n4[, lag2.alt2.public.presence   := shift(.SD), by=NAICS, .SDcols="lag.alt2.public.presence"]
combined.data.n4[, lag.aggregate.v     := shift(.SD), by=NAICS, .SDcols="aggregate.v"]

combined.data.n4[, lag.imports.K := ifelse(YEAR==1998, NA, lag.imports.K)]

combined.data.n4[, RCPTOT.growth               := (RCPTOT     - lag.RCPTOT )/lag.RCPTOT]
combined.data.n4[, lag.RCPTOT.growth           := (lag.RCPTOT - lag2.RCPTOT)/lag2.RCPTOT]
combined.data.n4[, lag.SALE.growth             := (lag.SALE   - lag2.SALE  )/lag2.SALE]

# create double lags for changes specification and changes
combined.data.n4[, lag2.W.alt2.public.presence   := shift(.SD), by=NAICS, .SDcols="lag.W.alt2.public.presence"]
combined.data.n4[, lag2.ave.duty.rate.overall   := shift(.SD), by=NAICS, .SDcols="lag.ave.duty.rate.overall"]
combined.data.n4[, lag2.VALADD.perc             := shift(.SD), by=NAICS, .SDcols="lag.VALADD.perc"]
combined.data.n4[, lag2.PAYANN.perc             := shift(.SD), by=NAICS, .SDcols="lag.PAYANN.perc"]
combined.data.n4[, lag2.WAGERATE                := shift(.SD), by=NAICS, .SDcols="lag.WAGERATE"]
combined.data.n4[, lag2.CEXTOT.perc             := shift(.SD), by=NAICS, .SDcols="lag.CEXTOT.perc"]
combined.data.n4[, lag2.INVTOTE.perc            := shift(.SD), by=NAICS, .SDcols="lag.INVTOTE.perc"]
combined.data.n4[, lag2.pct4                    := shift(.SD), by=NAICS, .SDcols="lag.pct4"]
combined.data.n4[, lag2.pct8                    := shift(.SD), by=NAICS, .SDcols="lag.pct8"]
combined.data.n4[, lag2.pct20                   := shift(.SD), by=NAICS, .SDcols="lag.pct20"]
combined.data.n4[, lag2.pct50                   := shift(.SD), by=NAICS, .SDcols="lag.pct50"]
combined.data.n4[, lag2.RCPTOT.growth           := (lag3.RCPTOT - lag2.RCPTOT)/lag3.RCPTOT]

combined.data.n4[, lag3.W.alt2.public.presence := shift(.SD), by=NAICS, .SDcols="lag2.W.alt2.public.presence"]

combined.data.n4[, lag3.alt2.public.presence := shift(.SD), by=NAICS, .SDcols="lag2.alt2.public.presence"]
combined.data.n4[, lag4.W.alt2.public.presence := shift(.SD), by=NAICS, .SDcols="lag3.W.alt2.public.presence"]
combined.data.n4[, lag5.W.alt2.public.presence := shift(.SD), by=NAICS, .SDcols="lag4.W.alt2.public.presence"]
combined.data.n4[, lag.BSEG.SALE := shift(.SD), by=NAICS, .SDcols="BSEG.SALE"]
combined.data.n4[, lag2.BSEG.SALE := shift(.SD), by=NAICS, .SDcols="lag.BSEG.SALE"]

combined.data.n4[, lag2.RCPTOT := shift(.SD), by=NAICS, .SDcols="lag.RCPTOT"]




##############################################################################################################
# Standardize Variables                                                                                      #
##############################################################################################################
combined.data.n4[,S.lag.W.alt2.public.presence := scale(lag.W.alt2.public.presence)]
combined.data.n4[,S.lag.ave.duty.rate.overall  := scale(lag.ave.duty.rate.overall)]
combined.data.n4[,S.lag.RCPTOT.growth          := scale(lag.RCPTOT.growth)]   
combined.data.n4[,S.lag.VALADD.perc            := scale(lag.VALADD.perc)]
combined.data.n4[,S.lag.PAYANN.perc            := scale(lag.PAYANN.perc)]
combined.data.n4[,S.lag.WAGERATE               := scale(lag.WAGERATE)]
combined.data.n4[,S.lag.CEXTOT.perc            := scale(lag.CEXTOT.perc)]
combined.data.n4[,S.lag.INVTOTE.perc           := scale(lag.INVTOTE.perc)]
combined.data.n4[,S.lag.pct20                  := scale(lag.pct20)]





##############################################################################################################
# Changes                                                                                                    #
##############################################################################################################
combined.data.n4[, d.import.comp.no.rp := ((1 - frac.related) * winsor(imports.K/RCPTOT)) - ((1 - lag.frac.related) * winsor(lag.imports.K/lag.RCPTOT))]
combined.data.n4[, d.lag.ave.duty.rate.overall   := lag.ave.duty.rate.overall       - lag2.ave.duty.rate.overall]
combined.data.n4[, d.lag.VALADD.perc             := lag.VALADD.perc                 - lag2.VALADD.perc]
combined.data.n4[, d.lag.PAYANN.perc             := lag.PAYANN.perc                 - lag2.PAYANN.perc]
combined.data.n4[, d.lag.WAGERATE                := lag.WAGERATE                    - lag2.WAGERATE]
combined.data.n4[, d.lag.pct20                   := lag.pct20                       - lag2.pct20]
combined.data.n4[, d.lag.RCPTOT.growth           := lag.RCPTOT.growth               - lag2.RCPTOT.growth]


#------------------------------------------------------------------------------------------------------------#
# Control Variables From Pierce and Shott                                                                    #
#------------------------------------------------------------------------------------------------------------#

# Load the NAICS rollups 
naicsrollup <- fread("/Users/omartian/Box/GO/ManufacturingDecline/Data/ASM/NAICSRollups.csv")
naicsrollup.long <- melt(naicsrollup, id.vars = "Rollup"); rm(naicsrollup)
naicsrollup.long <- naicsrollup.long[!is.na(value)]
naicsrollup.long[, NAICS := as.character(value)]
naicsrollup.long[, value:=NULL]

# link in the Pierce and Schott manufacturing "families" and the associated data
ps.families <- data.table(read.dta13(paste0(PS.data.dir, "bbg_fam_drop_50_n6_2.dta")))
ps.families[,n6 := as.character(n6)]
naicsrollup.long <- merge(naicsrollup.long, ps.families, all.x = TRUE, by.x="NAICS", by.y="n6")
# for instances where an ASM rollup covers multiple product families, collapse the family to the "lowest" number
setorder(naicsrollup.long, Rollup, fam50)
naicsrollup.long <- merge(naicsrollup.long, naicsrollup.long[,.SD[1,.(lowest.fam50=fam50,lowest.con50=con50)],keyby=Rollup],by="Rollup")

naicsrollup <- naicsrollup.long[,.SD[1,.(fam50=lowest.fam50,con50=lowest.con50)], by=Rollup ]
setnames(naicsrollup, "Rollup", "n6")
ps.families <- rbind(naicsrollup, ps.families[,.(n6, fam50, con50)])
setorder(ps.families, fam50, n6)
ps.gaps            <- data.table(read.dta13(paste0(PS.data.dir, "gaps_by_naics6_20150722_fam50.dta"     ))) #variable s1999           -i
ntr.gap.data <- merge(ps.families, ps.gaps[,.(fam50,s1999)],by="fam50")
ntr.gap.data <- ntr.gap.data[,.(s1999mean = mean(s1999, na.rm=TRUE)),keyby=.(n4=substr(n6,1,4))]
ntr.gap.data[,S.s1999mean := scale(s1999mean)]
combined.data.n4 <- merge(combined.data.n4,ntr.gap.data,all.x = TRUE, by.x="NAICS", by.y="n4")
combined.data.n4[,post_ntr := ifelse(YEAR >= 2001,S.s1999mean, 0)]
combined.data.n4[,post_ntr_ns := ifelse(YEAR >= 2001,s1999mean, 0)]


combined.data.n4[, lag.aggregate.v := shift(.SD), by=NAICS, .SDcols="aggregate.v"]


##############################################################################################################
# Rescale key explanatory variables                                                                          #
##############################################################################################################
# Drop observations where we don't have lagged public presence variable
combined.data.n4 <- combined.data.n4[!is.na(lag.W.alt2.public.presence)]
combined.data.n4 <- cbind(combined.data.n4, 
                          data.table(demeanlist(combined.data.n4[,.(demeaned.lag.W.alt2.public.presence = lag.W.alt2.public.presence)],
                                                list(factor(combined.data.n4$NAICS))))) 
combined.data.n4[, S.demeaned.lag.W.alt2.public.presence := scale(demeaned.lag.W.alt2.public.presence)]
combined.data.n4 <- merge(combined.data.n4, combined.data.n4[YEAR==1998,.(NAICS, 
                                                                          RCPTOT.98 = as.numeric(RCPTOT))], by="NAICS")



setorder(combined.data.n4, NAICS, YEAR)
combined.data.n4[, S.demeaned.lag2.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="S.demeaned.lag.W.alt2.public.presence"]
combined.data.n4[, S.demeaned.lag3.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="S.demeaned.lag2.W.alt2.public.presence"]
combined.data.n4[, S.demeaned.lag4.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="S.demeaned.lag3.W.alt2.public.presence"]
combined.data.n4[, S.demeaned.lag5.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="S.demeaned.lag4.W.alt2.public.presence"]

# output to stata for PSA test
combined.data.n4[,impcomprp := ((1 - frac.related) * winsor(imports.K/RCPTOT))]
save.dta13(combined.data.n4, "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/combined_data_n4.dta", convert.underscore = TRUE)






##############################################################################################################
# Event Study For Cost of Sox                                                                                #
##############################################################################################################
# 4 key time periods from Zhang paper:
#    2  - 2/01/02–2/04/02 (Friday-Monday)
#    14 - 7/08/02–7/12/02 (Monday-Friday)
#    16 - 7/18/02–7/23/02 (Thursday-Tuesday)
#    17 - 7/24/02–7/26/02 (Wednesday-Friday)
# Use for event period 12 trading days starting with event 2 (7/8/02)

sox.returns <- fread(paste0(sox.event.dir, "Event2-0-12.csv"))
sox.stocks <- fread(paste0(sox.event.dir, "StocksForEvent.csv"))
sox.marketcap <- fread(paste0(sox.event.dir, "StocksMarketCap.csv"))
sox.returns <- merge(sox.returns, sox.stocks[,.(PERMNO, HNAICS)], by.x="permno", by.y="PERMNO")
sox.returns <- merge(sox.returns, sox.marketcap[!is.na(MARKETCAP),.(PERMNO, MARKETCAP)], by.x= "permno", by.y="PERMNO", all.x=TRUE)

# Export SOX Identifiers for JAR data replication policy
fwrite(sox.returns[,.(permno)], "C:/Users/omartian/GO/Identifiers/SoxIdentifiers.csv")

sox.returns[,n4 := substr(HNAICS, 1, 4)]
sox.industry.returns.ew <- sox.returns[n4 >= 3100 & n4 <= 3399,.(bhar.ind = mean(bhar), .N), by=n4]
sox.industry.returns.ew <- sox.industry.returns.ew[N > 9]
sox.industry.returns.ew[, rank.ew := rank(bhar.ind)]

sox.event.consol <- merge(combined.data.n4,  sox.industry.returns.ew[,.(NAICS=n4, bhar.ind.ew = bhar.ind, num_firms.ew = N, rank.ew)], by="NAICS", all.x = TRUE)

sox.event.consol[,rank.ew.post := ifelse(YEAR > 2002, scale(rank.ew), 0) ]
sox.event.consol[,bhar.ind.ew.post := ifelse(YEAR > 2002, scale(bhar.ind.ew), 0)]

sox.event.consol[,S.bhar.ind.ew := scale(bhar.ind.ew)]

sox.event.consol <- sox.event.consol[!is.na(lag.W.alt2.public.presence)]
sox.event.consol <- cbind(sox.event.consol, 
                          data.table(demeanlist(sox.event.consol[,.(demeaned.lag.W.alt2.public.presence = lag.W.alt2.public.presence)],
                                                list(factor(sox.event.consol$NAICS))))) 
sox.event.consol[, S.demeaned.lag.W.alt2.public.presence := scale(demeaned.lag.W.alt2.public.presence)]

sox.event.consol[, import.comp.no.rp           := winsor(imports.K/RCPTOT) * (1 - frac.related)]
sox.event.consol[, S.import.comp.no.rp         := scale(import.comp.no.rp)]
sox.event.consol[, import.comp.incl.rp         := winsor(imports.K/RCPTOT) ]
sox.event.consol[, S.import.comp.incl.rp       := scale(import.comp.incl.rp)]

sox.event.consol[,S.lag.ave.duty.rate.overall := scale(lag.ave.duty.rate.overall)] 
sox.event.consol[,S.lag.VALADD.perc           := scale(lag.VALADD.perc)]
sox.event.consol[,S.lag.PAYANN.perc           := scale(lag.PAYANN.perc)]
sox.event.consol[,S.lag.WAGERATE              := scale(lag.WAGERATE)]
sox.event.consol[,S.lag.pct20                 := scale(lag.pct20)]
sox.event.consol[,S.lag.RCPTOT.growth         := scale(lag.RCPTOT.growth)] 

Sox.changes <- merge(sox.event.consol[YEAR == 2002, .(import.comp.no.rp, import.comp.incl.rp,S.lag.ave.duty.rate.overall, S.lag.VALADD.perc,
                                                      S.lag.PAYANN.perc, S.lag.WAGERATE, S.lag.pct20, S.lag.RCPTOT.growth, aggregate.v, 
                                                      S.demeaned.lag.W.alt2.public.presence,S.bhar.ind.ew,NAICS)],
                     sox.event.consol[YEAR == 2007, .(import.comp.no.rp,import.comp.incl.rp,S.lag.ave.duty.rate.overall, S.lag.VALADD.perc,
                                                      S.lag.PAYANN.perc, S.lag.WAGERATE, S.lag.pct20, S.lag.RCPTOT.growth, aggregate.v, 
                                                      S.demeaned.lag.W.alt2.public.presence, NAICS)],
                     by='NAICS')

# NACE - NAICS concordance downloaded from https://ec.europa.eu/eurostat/ramon/relations/index.cfm?TargetUrl=LST_REL&StrLanguageCode=EN&IntCurrentPage=11 on 11/7/19
NACE.NAICS.concordance <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concordances/NACE_REV2-US_NAICS_2012.csv")


#==========================================================================================
# Process Orbis information from German Firms to get Disclosure
#==========================================================================================
# grep the file to filter out non-German observations (Run from the command line)
# grep '^DE' ~/Box/Go/ManufacturingDecline/Data/Orbis/Key_financials-USD.txt > german_key_financials-USD.txt
# grep '^DE' ~/Box/Go/ManufacturingDecline/Data/Orbis/Industry_classifications.txt > german_industry_classifications.txt
# grep '^DE' ~/Box/Go/ManufacturingDecline/Data/Orbis/Legal_info.txt > german_legal_info.txt

# Read in the German firms' financial data
DE.orbis.financials <- fread("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Orbis/german_key_financials-USD.txt", 
                             select=c(1,2,3,4,5,8,10,11,12,14,16,24,25),
                             col.names=c('BvD.id', 'consol.code', 'filing.type', 'date', 'months', 'source', 'original.currency', 'exchange.rate', 'sales', 'ni', 'ta', 'emps', 'market.cap'))
DE.orbis.financials[,consol.code      := factor(consol.code)]
DE.orbis.financials[,filing.type      := factor(filing.type)]
DE.orbis.financials[,source           := factor(source)]
DE.orbis.financials[,original.currency := factor(original.currency)]
DE.orbis.financials[,date := as.Date(as.character(date), format='%Y%m%d')]
DE.orbis.financials <- DE.orbis.financials[months==12]
DE.orbis.financials[,months := NULL]




# Read in Incorporation Dates and Legal Form of Organizations
DE.orbis.legal <- fread("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Orbis/german_legal_info.txt", quote="", select=c(1,8,10), 
                        col.names=c("BvD.id", "legal.form", "incorporation.date"))
DE.orbis.legal[,legal.form := as.factor(legal.form)]
DE.orbis.legal[,incorporation.date := as.Date(as.character(incorporation.date), format="%Y%m%d")]

# Link industry information and incorporation dates to the financials 
DE.orbis.financials <- merge(DE.orbis.financials, DE.orbis.legal, by="BvD.id", all.x=TRUE)

# Export German Identifiers for JAR data replication policy
fwrite(DE.orbis.financials[,.(BvD.id,consol.code,filing.type,date)], "C:/Users/omartian/GO/Identifiers/GermanIdentifiers.csv")

# Remove double counting of consolidated records
setorder(DE.orbis.financials,BvD.id,date,-sales)
DE.orbis.financials.nodc <- DE.orbis.financials[,.SD[1],by=.(BvD.id,date)]

# Drop records with no legal form or incorporation date
DE.orbis.financials.nodc <- DE.orbis.financials.nodc[!is.na(incorporation.date) & legal.form != ""]

# Read In German Firms' industries
DE.orbis.industries <- fread("c:/Users/omartian/Box/Go/ManufacturingDecline/Data/Orbis/german_industry_classifications.txt", 
                             select=c(1,8,10,12,14,16,18))
setnames(DE.orbis.industries, c("BvD.id", "NACE4.core", "NACE4.primary", "NACE4.secondary", "NAICS4.core", "NAICS6.primary", "NAICS6.secondary"))
DE.orbis.industries <- melt(DE.orbis.industries, "BvD.id", na.rm=TRUE)
# Count number of primary and secondary industries
DE.orbis.NAICS4.core      <-       DE.orbis.industries[variable=="NAICS4.core",     .(BvD.id,industry=value)]
rm(DE.orbis.industries)

DE.orbis.financials.with.n6 <- merge(DE.orbis.financials.nodc, DE.orbis.NAICS4.core, by="BvD.id")
DE.orbis.financials.with.n6[,apportioned.sales:=sales]
DE.orbis.financials.with.n6 <- DE.orbis.financials.with.n6[industry >= 3100 & industry <= 3399]
DE.orbis.financials.with.n6[,industry := substr(industry,1,4)]
DE.orbis.financials.with.n6 <- DE.orbis.financials.with.n6[,sum(apportioned.sales, na.rm=TRUE),keyby=.(BvD.id,consol.code,filing.type,date,source,original.currency,
                                                                                                       exchange.rate,sales,ni,ta,emps,market.cap,incorporation.date,legal.form,industry)]
DE.orbis.financials.with.n6[,year := year(date)]
setnames(DE.orbis.financials.with.n6, "V1", "naics4.sales")
DE.aggregated.sales <- merge(DE.orbis.financials.with.n6[!is.na(naics4.sales),                      .(disc.firms  =.N, disc.sales=sum(naics4.sales)),  keyby=.(industry,year)],
                             DE.orbis.financials.with.n6[!is.na(naics4.sales) & !is.na(market.cap), .(public.firms=.N, public.sale=sum(naics4.sales)), keyby=.(industry,year)],
                             by=c("industry", "year"), all.x=TRUE)

# Calculate an end of the year Euro-USD exchange rate
euro.usd.exchange.rate <- DE.orbis.financials[original.currency=="EUR" & month(date)==12 & mday(date)==31 & year(date)>=1995,.N,
                                              keyby=.(date,exchange.rate)]
euro.usd.exchange.rate[,year:=year(date)]
setorder(euro.usd.exchange.rate, year, -N)
euro.usd.exchange.rate <- euro.usd.exchange.rate[,.SD[1],by=year]


#==========================================================================================
# Process BACI data for German Import Competition
#==========================================================================================
# Imports
German.imports <- baci.data[j.iso2=="DE"]
German.imports.aggregated <- German.imports[,.(v=sum(tot.v), q=sum(tot.q)),keyby=.(naics,t)]

#==========================================================================================
# Process EU manufacturing production data for denominators of German import competition and disclosure
#==========================================================================================

# Load German Production
de.prod.13.18 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,10)]
    setnames(europe.prod, c("prodcom.code", "de.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, de.production.eur := as.numeric(de.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(de.production.eur=sum(de.production.eur), year=yr),keyby=nace]
}
de.prod.06.12 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,16)]
    setnames(europe.prod, c("prodcom.code", "de.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, de.production.eur := as.numeric(de.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(de.production.eur=sum(de.production.eur), year=yr),keyby=nace]
}
de.prod.03.05 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,19)]
    setnames(europe.prod, c("prodcom.code", "de.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, de.production.eur := as.numeric(de.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(de.production.eur=sum(de.production.eur), year=yr),keyby=nace]
}
de.prod.95.02 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website-snapshot-", yr, "-created-2009-11-09-N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,10)]
    setnames(europe.prod, c("prodcom.code", "de.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, de.production.eur := as.numeric(de.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(de.production.eur=sum(de.production.eur), year=yr),keyby=nace]
}

de.production <- rbindlist(c(lapply(as.list(2013:2018), de.prod.13.18),
                             lapply(as.list(2006:2012), de.prod.06.12),
                             lapply(as.list(2003:2005), de.prod.03.05),
                             lapply(as.list(1995:2002), de.prod.95.02)))

temp <- trimws(de.production$nace)
de.production[,nace := ifelse(nchar(temp)==3,paste0("0",temp),temp)]


# Convert NACE4 to NAICS4 to match up with disclosure and imports
naics.nace.concordance <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concordances/NACE_REV2-US_NAICS_2012.csv", 
                                colClasses = list('character'=c(1,3)))
setnames(naics.nace.concordance, c('nace.r2', 'nace.desc', 'naics.12', 'naics.desc', 'notes'))
naics.nace.concordance[,nace.r2 := gsub("\\.","",nace.r2)]
naics.nace.concordance[,naics4 := substr(naics.12, 1, 4)]
naics.nace.concordance <- unique(naics.nace.concordance[,.(nace.r2,naics4)])
nace.conversion <- merge(naics.nace.concordance, naics.nace.concordance[,.N,keyby=nace.r2], by="nace.r2")
setnames(nace.conversion, 'N', 'num.matches')

de.production.naics <- merge(de.production, nace.conversion, by.x='nace', by.y='nace.r2', allow.cartesian=TRUE)
de.production.naics[,de.production.eur := de.production.eur / num.matches]
de.production.naics <- de.production.naics[,.(de.production.eur = sum(de.production.eur)),keyby=.(year,naics4)]
de.production.naics <- merge(de.production.naics, euro.usd.exchange.rate[,.(year,exchange.rate)], by="year")
de.production.naics[,de.production.usd := de.production.eur * exchange.rate]


#==========================================================================================
# Create the dataset to run the analyses
#==========================================================================================
# Merge imports with production to get import competition
German.data <- merge(German.imports.aggregated[,.(naics,year=t,imports=v)],
                     de.production.naics[naics4 > 3100 & naics4 < 3400,.(naics=naics4,year,de.production.usd)],
                     by=c("naics","year"), all=TRUE)
German.data[is.na(German.data)]<-0
German.data[,import.competition := ifelse(de.production.usd==0, NA, imports/de.production.usd)]
German.data[,W.import.competition := winsor(import.competition)]

# Merge with disclosed sales to get disclosure fraction
German.data <- merge(German.data,
                     DE.aggregated.sales, 
                     by.x=c("naics","year"), by.y=c("industry","year"), all.x=TRUE)
German.data[is.na(German.data)] <- 0
German.data[,disclosure := ifelse(de.production.usd==0,NA,as.numeric(disc.sales)/1000/de.production.usd)]
German.data[,W.disclosure := winsor(disclosure)]
German.data[,public.sales.frac := ifelse(de.production.usd==0,NA,as.numeric(public.sale)/1000/de.production.usd)]
German.data[,W.public.sales.frac := winsor(public.sales.frac)]

# Lag disclosure 1 year
setorder(German.data, naics, year)
German.data[, lag.W.disclosure        := shift(.SD), by=.(naics), .SDcols="W.disclosure"]
German.data[, lag.W.public.sales.frac := shift(.SD), by=.(naics), .SDcols="W.public.sales.frac"]
German.data[, lag.de.production.usd := shift(.SD), by=.(naics), .SDcols="de.production.usd"]
German.data[, lag2.de.production.usd := shift(.SD), by=.(naics), .SDcols="lag.de.production.usd"]
German.data[, lag2.W.disclosure := shift(.SD), by=.(naics), .SDcols='lag.W.disclosure']
German.data[, lag.W.import.competition := shift(.SD), by=.(naics), .SDcols='W.import.competition']
German.data[, lag2.W.public.sales.frac := shift(.SD), by=.(naics), .SDcols='lag.W.public.sales.frac']

# Merge in imports from other high-wealth countries 
German.data <- merge(German.data, oth.hi.inc.imports.noDE, by.x=c("naics", "year"), by.y=c("naics", "t"), all.x=TRUE)


German.data1 <- German.data[!is.na(lag.W.disclosure)]


German.data2 <- cbind(German.data1, 
                      data.table(demeanlist(German.data1[,.(demeaned.lag.W.disclosure      = lag.W.disclosure,
                                                            demeaned.lag.W.public.sales.frac = lag.W.public.sales.frac,
                                                            demeaned.W.import.competition = W.import.competition)],
                                            list(factor(German.data1$naics))))) 
German.data2[, S.demeaned.lag.W.disclosure     := scale(demeaned.lag.W.disclosure)]
German.data2[, S.demeaned.lag.W.public.sales.frac := scale(demeaned.lag.W.public.sales.frac)]
German.data2[, S.demeaned.W.import.competition := scale(demeaned.W.import.competition)]

German.data3 <- merge(German.data2, combined.data.n4[,.(frac.related, imports.K, RCPTOT, YEAR, NAICS, 
                                                        S.demeaned.lag.W.alt2.public.presence, lag.W.alt2.public.presence)],
                      by.x=c("year", "naics"), by.y=c("YEAR", "NAICS"))
German.data4 <- cbind(German.data3[!is.na(frac.related)], 
                      data.table(demeanlist(German.data3[!is.na(frac.related),.(demeaned.US.import.comp      = (1-frac.related)*winsor(imports.K/RCPTOT))],
                                            list(factor(German.data3[!is.na(frac.related),naics]))))) 
German.data5 <- merge(German.data4, merge(German.data4[year==2008, .(naics,disc.8 = disclosure, psfrac.8 = public.sales.frac)],
                                          German.data4[year==2006, .(naics,disc.6 = disclosure, psfrac.6 = public.sales.frac)],by='naics'),
                      by='naics')
German.data5[,W.disc.treat := winsor(disc.8-disc.6,.02)]
German.data5[,W.ps.treat := winsor(psfrac.8-psfrac.6,.02)]

setorder(German.data4, naics, year)


##############################################################################################################
# Earnings Quality                                                                                           #
##############################################################################################################
earnings.qlty <- data.table(read.sas7bdat("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/fs_eq_qlty.sas7bdat"))
earnings.qlty[,rsquare_volfull    := as.numeric(as.character(rsquare_volfull))]
earnings.qlty[,rsquare_absretfull := as.numeric(as.character(rsquare_absretfull))]
earnings.qlty[,rsquare_volrdq     := as.numeric(as.character(rsquare_volrdq))]
earnings.qlty[,rsquare_absretrdq  := as.numeric(as.character(rsquare_absretrdq))]
earnings.qlty[,naicsh := as.character(naicsh)]
earnings.qlty[,yearp1 := fyearq + 1]

comb.data.eq <- merge(combined.data.n4, earnings.qlty, by.x=c("NAICS", "YEAR"), by.y=c("naicsh", "yearp1"))


#########################
# Guidance and Analysts #
#########################

guidance <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/Guidance.csv')
guidance.ids <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/GuidanceIDs.csv')

# Export TBES Guidance tickers for JAR data replication policy
fwrite(guidance.ids[,.(ticker)], "C:/Users/omartian/GO/Identifiers/GuidanceDataIdentifiers.csv")

forecasts <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/Forecasts.csv')
forecasts_all <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/IBES/ffbfca07a57fdbca.csv')
forecasts_all_s <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/IBES/c2a86a194c1438ad.csv')
crsp_head <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/CRSP_header.csv')
gvkey_cusip <- fread('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/GVKEY_CUSIP.csv')
linktab <- read_sas('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/ccmxpf_linktable.sas7bdat')
linktab <- merge(linktab,crsp_head,by.y="PERMNO", by.x='lpermno')
linktab <- data.table(linktab)
linktab <- unique(linktab[,.(gvkey,CUSIP)])
setnames(linktab,'CUSIP', 'cusip')
linktab[,gvkey:=as.integer(gvkey)]

id_guidance <- data.table(read_sas('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/id_guidance.sas7bdat'))
id_guidance[,ibes_tkr_edate := fifelse(is.na(ibes_tkr_edate), as.Date("2025-01-01", format="%Y-%m-%d"), ibes_tkr_edate)]
id_guidance_ext <- data.table(read_sas('C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Guidance/id_guidance_ext.sas7bdat'))


# cross-reference of cusips and gvkeys by fiscal year
gvkey_cusip <- unique(gvkey_cusip[cusip != '',.(gvkey,cusip=substr(cusip,1,8))])

# only pick guidance that's over a year out
guidance.gt1yr <- guidance[measure %in% c('EPS')#,'NET', 'EBT', 'OPR', 'PRE', 'GPS') 
                           & usfirm==1 & as.Date(paste0(as.character(prd_yr),
                                                        ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                                        "01"), format="%Y%m%d") 
                           - as.Date(as.character(anndats), format="%Y%m%d") > 0]
guidance.gt1yr[,range.days := as.Date(paste0(as.character(prd_yr),
                                             ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                             "01"), format="%Y%m%d") 
               - as.Date(as.character(anndats), format="%Y%m%d")] 

#link cusip to IBES ticker 
id_guidance[,ibes_start := year(ibes_tkr_sdate)]
id_guidance[,ibes_end := year(ibes_tkr_edate)]
guidance.firms <- id_guidance[guidance.gt1yr[,.N,keyby=.(ticker,annyear = as.integer(substr(as.character(anndats),1,4)),annyear_dup = as.integer(substr(as.character(anndats),1,4)))], 
                              on=.(ticker, ibes_start <= annyear, ibes_end >= annyear)]
guidance.firms <- unique(guidance.firms[,.(ticker,cusip,annyear=annyear_dup)]) 


# link gvkey by cusip 
guidance.firms <- merge(guidance.firms, linktab, by='cusip', all.x=TRUE)
guided.compustat <- merge(compustat, guidance.firms[,.(gvkey,annyear=as.integer(annyear),guided=1)],by.x=c('gvkey','fyear'),by.y=c('gvkey','annyear'),all.x=TRUE)
guided.compustat[,guided := ifelse(is.na(guided),0,1)]


guided.compustat.merged.bs <- merge(guided.compustat, seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4], by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD",.(gvkey,fyear,best.n4,seg.n4,sale,seg.sales,guided)]
guided.compustat.merged.bs[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
guided.compustat.merged.bs[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[!is.na(best.n4.incseg)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
guided.compustat.merge.bs.agg <- guided.compustat.merged.bs[,.(BSEG.FIRMS.GUIDED     = sum(guided),
                                                               BSEG.SALE.GUIDED      = 1000 * sum(best.n4.incseg.sales*guided, na.rm=TRUE),
                                                               BSEG.FIRMS.UNGUIDED   = sum(ifelse(guided==1,0,1)),
                                                               BSEG.SALE.UNGUIDED    = 1000 * sum(best.n4.incseg.sales*ifelse(guided==1,0,1), na.rm=TRUE)),
                                                            keyby=.(fyear, best.n4.incseg)]
setnames(guided.compustat.merge.bs.agg, "fyear", "YEAR")
setnames(guided.compustat.merge.bs.agg, "best.n4.incseg", "best.n4")

combined.data.n4.guided <- merge(combined.data.n4, guided.compustat.merge.bs.agg, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))

combined.data.n4.guided[,guided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.GUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.guided.alt2.public.presence := winsor(guided.alt2.public.presence)] 
combined.data.n4.guided[,unguided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.UNGUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.unguided.alt2.public.presence := winsor(unguided.alt2.public.presence)] 

setorder(combined.data.n4.guided, NAICS, YEAR)
combined.data.n4.guided[, lag.W.guided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.guided.alt2.public.presence"]
combined.data.n4.guided[, lag.W.unguided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.unguided.alt2.public.presence"]

sa2.c.guided.earn.ctrls.rp     <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                       ~ lag.W.guided.alt2.public.presence + lag.W.unguided.alt2.public.presence + 
                                           lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                       | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.guided)
summary(sa2.c.guided.earn.ctrls.rp)
lh.guid.earn <- linearHypothesis(sa2.c.guided.earn.ctrls.rp, 'lag.W.guided.alt2.public.presence = lag.W.unguided.alt2.public.presence')
lh.guid.earn


#################
# Sales Guidance

guidance.gt1yr <- guidance[measure %in% c('SAL') & usfirm==1 & as.Date(paste0(as.character(prd_yr),
                                                                              ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                                                              "01"), format="%Y%m%d") 
                           - as.Date(as.character(anndats), format="%Y%m%d") > 0]
guidance.gt1yr[,range.days := as.Date(paste0(as.character(prd_yr),
                                             ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                             "01"), format="%Y%m%d") 
               - as.Date(as.character(anndats), format="%Y%m%d")] 

#link cusip to IBES ticker 
id_guidance[,ibes_start := year(ibes_tkr_sdate)]
id_guidance[,ibes_end := year(ibes_tkr_edate)]
guidance.firms <- id_guidance[guidance.gt1yr[,.N,keyby=.(ticker,annyear = as.integer(substr(as.character(anndats),1,4)),annyear_dup = as.integer(substr(as.character(anndats),1,4)))], 
                              on=.(ticker, ibes_start <= annyear, ibes_end >= annyear)]
guidance.firms <- unique(guidance.firms[,.(ticker,cusip,annyear=annyear_dup)]) 


# link gvkey by cusip 
guidance.firms <- merge(guidance.firms, linktab, by='cusip', all.x=TRUE)
guided.compustat <- merge(compustat, guidance.firms[,.(gvkey,annyear=as.integer(annyear),guided=1)],by.x=c('gvkey','fyear'),by.y=c('gvkey','annyear'),all.x=TRUE)
guided.compustat[,guided := ifelse(is.na(guided),0,1)]


guided.compustat.merged.bs <- merge(guided.compustat, seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4], by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD",.(gvkey,fyear,best.n4,seg.n4,sale,seg.sales,guided)]
guided.compustat.merged.bs[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
guided.compustat.merged.bs[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[!is.na(best.n4.incseg)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
guided.compustat.merge.bs.agg <- guided.compustat.merged.bs[,.(BSEG.FIRMS.GUIDED     = sum(guided),
                                                               BSEG.SALE.GUIDED      = 1000 * sum(best.n4.incseg.sales*guided, na.rm=TRUE),
                                                               BSEG.FIRMS.UNGUIDED   = sum(ifelse(guided==1,0,1)),
                                                               BSEG.SALE.UNGUIDED    = 1000 * sum(best.n4.incseg.sales*ifelse(guided==1,0,1), na.rm=TRUE)),
                                                            keyby=.(fyear, best.n4.incseg)]
setnames(guided.compustat.merge.bs.agg, "fyear", "YEAR")
setnames(guided.compustat.merge.bs.agg, "best.n4.incseg", "best.n4")

combined.data.n4.guided <- merge(combined.data.n4, guided.compustat.merge.bs.agg, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))

combined.data.n4.guided[,guided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.GUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.guided.alt2.public.presence := winsor(guided.alt2.public.presence)] 
combined.data.n4.guided[,unguided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.UNGUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.unguided.alt2.public.presence := winsor(unguided.alt2.public.presence)] 

setorder(combined.data.n4.guided, NAICS, YEAR)
combined.data.n4.guided[, lag.W.guided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.guided.alt2.public.presence"]
combined.data.n4.guided[, lag.W.unguided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.unguided.alt2.public.presence"]

sa2.c.guided.sales.ctrls.rp     <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                        ~ lag.W.guided.alt2.public.presence + lag.W.unguided.alt2.public.presence + 
                                            lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                        | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.guided)
summary(sa2.c.guided.sales.ctrls.rp)
lh.guid.sales <- linearHypothesis(sa2.c.guided.sales.ctrls.rp, 'lag.W.guided.alt2.public.presence = lag.W.unguided.alt2.public.presence')
lh.guid.sales
########################
# Gross Margin Guidance
guidance.gt1yr <- guidance[measure %in% c('GRM') & usfirm==1 & as.Date(paste0(as.character(prd_yr),
                                                                              ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                                                              "01"), format="%Y%m%d") 
                           - as.Date(as.character(anndats), format="%Y%m%d") > 0]
guidance.gt1yr[,range.days := as.Date(paste0(as.character(prd_yr),
                                             ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                             "01"), format="%Y%m%d") 
               - as.Date(as.character(anndats), format="%Y%m%d")] 

#link cusip to IBES ticker 
id_guidance[,ibes_start := year(ibes_tkr_sdate)]
id_guidance[,ibes_end := year(ibes_tkr_edate)]
guidance.firms <- id_guidance[guidance.gt1yr[,.N,keyby=.(ticker,annyear = as.integer(substr(as.character(anndats),1,4)),annyear_dup = as.integer(substr(as.character(anndats),1,4)))], 
                              on=.(ticker, ibes_start <= annyear, ibes_end >= annyear)]
guidance.firms <- unique(guidance.firms[,.(ticker,cusip,annyear=annyear_dup)]) 


# link gvkey by cusip 
guidance.firms <- merge(guidance.firms, linktab, by='cusip', all.x=TRUE)
guided.compustat <- merge(compustat, guidance.firms[,.(gvkey,annyear=as.integer(annyear),guided=1)],by.x=c('gvkey','fyear'),by.y=c('gvkey','annyear'),all.x=TRUE)
guided.compustat[,guided := ifelse(is.na(guided),0,1)]


guided.compustat.merged.bs <- merge(guided.compustat, seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4], by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD",.(gvkey,fyear,best.n4,seg.n4,sale,seg.sales,guided)]
guided.compustat.merged.bs[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
guided.compustat.merged.bs[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[!is.na(best.n4.incseg)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
guided.compustat.merge.bs.agg <- guided.compustat.merged.bs[,.(BSEG.FIRMS.GUIDED     = sum(guided),
                                                               BSEG.SALE.GUIDED      = 1000 * sum(best.n4.incseg.sales*guided, na.rm=TRUE),
                                                               BSEG.FIRMS.UNGUIDED   = sum(ifelse(guided==1,0,1)),
                                                               BSEG.SALE.UNGUIDED    = 1000 * sum(best.n4.incseg.sales*ifelse(guided==1,0,1), na.rm=TRUE)),
                                                            keyby=.(fyear, best.n4.incseg)]
setnames(guided.compustat.merge.bs.agg, "fyear", "YEAR")
setnames(guided.compustat.merge.bs.agg, "best.n4.incseg", "best.n4")

combined.data.n4.guided <- merge(combined.data.n4, guided.compustat.merge.bs.agg, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))

combined.data.n4.guided[,guided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.GUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.guided.alt2.public.presence := winsor(guided.alt2.public.presence)] 
combined.data.n4.guided[,unguided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.UNGUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.unguided.alt2.public.presence := winsor(unguided.alt2.public.presence)] 

setorder(combined.data.n4.guided, NAICS, YEAR)
combined.data.n4.guided[, lag.W.guided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.guided.alt2.public.presence"]
combined.data.n4.guided[, lag.W.unguided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.unguided.alt2.public.presence"]

sa2.c.guided.gm.ctrls.rp     <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                     ~ lag.W.guided.alt2.public.presence + lag.W.unguided.alt2.public.presence + 
                                         lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                     | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.guided)
summary(sa2.c.guided.gm.ctrls.rp)
lh.guid.gm <- linearHypothesis(sa2.c.guided.gm.ctrls.rp, 'lag.W.guided.alt2.public.presence = lag.W.unguided.alt2.public.presence')
lh.guid.gm

#################
# CapEx Guidance 
guidance.gt1yr <- guidance[measure %in% c('CPX') & usfirm==1 & as.Date(paste0(as.character(prd_yr),
                                                                              ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                                                              "01"), format="%Y%m%d") 
                           - as.Date(as.character(anndats), format="%Y%m%d") > 0]
guidance.gt1yr[,range.days := as.Date(paste0(as.character(prd_yr),
                                             ifelse(prd_mon < 10, paste0('0',as.character(prd_mon)),as.character(prd_mon)),
                                             "01"), format="%Y%m%d") 
               - as.Date(as.character(anndats), format="%Y%m%d")] 

#link cusip to IBES ticker 
id_guidance[,ibes_start := year(ibes_tkr_sdate)]
id_guidance[,ibes_end := year(ibes_tkr_edate)]
guidance.firms <- id_guidance[guidance.gt1yr[,.N,keyby=.(ticker,annyear = as.integer(substr(as.character(anndats),1,4)),annyear_dup = as.integer(substr(as.character(anndats),1,4)))], 
                              on=.(ticker, ibes_start <= annyear, ibes_end >= annyear)]
guidance.firms <- unique(guidance.firms[,.(ticker,cusip,annyear=annyear_dup)]) 


# link gvkey by cusip
guidance.firms <- merge(guidance.firms, linktab, by='cusip', all.x=TRUE)
guided.compustat <- merge(compustat, guidance.firms[,.(gvkey,annyear=as.integer(annyear),guided=1)],by.x=c('gvkey','fyear'),by.y=c('gvkey','annyear'),all.x=TRUE)
guided.compustat[,guided := ifelse(is.na(guided),0,1)]


guided.compustat.merged.bs <- merge(guided.compustat, seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4], by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD",.(gvkey,fyear,best.n4,seg.n4,sale,seg.sales,guided)]
guided.compustat.merged.bs[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
guided.compustat.merged.bs[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[!is.na(best.n4.incseg)]
guided.compustat.merged.bs <- guided.compustat.merged.bs[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
guided.compustat.merge.bs.agg <- guided.compustat.merged.bs[,.(BSEG.FIRMS.GUIDED     = sum(guided),
                                                               BSEG.SALE.GUIDED      = 1000 * sum(best.n4.incseg.sales*guided, na.rm=TRUE),
                                                               BSEG.FIRMS.UNGUIDED   = sum(ifelse(guided==1,0,1)),
                                                               BSEG.SALE.UNGUIDED    = 1000 * sum(best.n4.incseg.sales*ifelse(guided==1,0,1), na.rm=TRUE)),
                                                            keyby=.(fyear, best.n4.incseg)]
setnames(guided.compustat.merge.bs.agg, "fyear", "YEAR")
setnames(guided.compustat.merge.bs.agg, "best.n4.incseg", "best.n4")

combined.data.n4.guided <- merge(combined.data.n4, guided.compustat.merge.bs.agg, all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))

combined.data.n4.guided[,guided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.GUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.guided.alt2.public.presence := winsor(guided.alt2.public.presence)] 
combined.data.n4.guided[,unguided.alt2.public.presence := ifelse(RCPTOT==0, NA, BSEG.SALE.UNGUIDED/(as.numeric(RCPTOT)))]
combined.data.n4.guided[,W.unguided.alt2.public.presence := winsor(unguided.alt2.public.presence)] 

setorder(combined.data.n4.guided, NAICS, YEAR)
combined.data.n4.guided[, lag.W.guided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.guided.alt2.public.presence"]
combined.data.n4.guided[, lag.W.unguided.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.unguided.alt2.public.presence"]

sa2.c.guided.cpx.ctrls.rp     <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                      ~ lag.W.guided.alt2.public.presence + lag.W.unguided.alt2.public.presence + 
                                          lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                      | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.guided)
summary(sa2.c.guided.cpx.ctrls.rp)
lh.guid.cpx <- linearHypothesis(sa2.c.guided.cpx.ctrls.rp, 'lag.W.guided.alt2.public.presence = lag.W.unguided.alt2.public.presence')
lh.guid.cpx


### Analysts
analyst.firms <- forecasts_all_s[,.(max.analysts=max(NUMEST)),by=.(CUSIP,MEASURE,FPI,year=substr(STATPERS,1,4))]

# Export Analyst sample Identifiers for JAR data replication policy
fwrite(analyst.firms[,.(CUSIP,MEASURE,FPI,year)], "C:/Users/omartian/GO/Identifiers/AnalystIdentifiers.csv")


analyst.firms <- dcast(analyst.firms[CUSIP != ""], CUSIP + year ~ MEASURE + FPI, value.var = 'max.analysts')
analyst.firms <- merge(analyst.firms, linktab, by.x='CUSIP', by.y='cusip', all.x=TRUE)
analyst.firms[,year := as.integer(year)]
analyst.compustat <- merge(compustat[fyear>=1996], analyst.firms[!is.na(gvkey)],by.x=c('gvkey','fyear'),by.y=c('gvkey','year'),all.x=TRUE)
analyst.compustat[,31:50][is.na(analyst.compustat[,31:50])] <- 0 

analyst.compustat <- merge(analyst.compustat, seg.sales[!is.na(seg.n4) & nchar(seg.n4)==4], by=c("gvkey","fyear"),all.x=TRUE)[loc=="USA" & curcd=="USD"]

analyst.compustat[,best.n4.incseg := ifelse(is.na(seg.n4),best.n4,seg.n4)]
analyst.compustat[,best.n4.incseg.sales := ifelse(is.na(seg.sales),sale,seg.sales)]
analyst.compustat <- analyst.compustat[!is.na(best.n4.incseg)]
analyst.compustat <- analyst.compustat[nchar(best.n4.incseg)==4 & best.n4.incseg > 3100 & best.n4.incseg < 3400]
analyst.compustat <- analyst.compustat[,.(BSEG.FIRMS = .N,
                                          BSEG.SALE = 1000 * sum(best.n4.incseg.sales,na.rm = TRUE),
                                          ANALYSTS.CPX1 = sum(CPX_1),
                                          ANALYSTS.CPX2 = sum(CPX_2),
                                          ANALYSTS.CPX3 = sum(CPX_3),
                                          ANALYSTS.CPX4 = sum(CPX_4),
                                          ANALYSTS.CPX5 = sum(CPX_5),
                                          ANALYSTS.EPS1 = sum(EPS_1),
                                          ANALYSTS.EPS2 = sum(EPS_2),
                                          ANALYSTS.EPS3 = sum(EPS_3),
                                          ANALYSTS.EPS4 = sum(EPS_4),
                                          ANALYSTS.EPS5 = sum(EPS_5),
                                          ANALYSTS.SAL1 = sum(SAL_1),
                                          ANALYSTS.SAL2 = sum(SAL_2),
                                          ANALYSTS.SAL3 = sum(SAL_3),
                                          ANALYSTS.SAL4 = sum(SAL_4),
                                          ANALYSTS.SAL5 = sum(SAL_5),
                                          ANALYSTS.GRM1 = sum(GRM_1),
                                          ANALYSTS.GRM2 = sum(GRM_2),
                                          ANALYSTS.GRM3 = sum(GRM_3),
                                          ANALYSTS.GRM4 = sum(GRM_4),
                                          ANALYSTS.GRM5 = sum(GRM_5)),
                                       keyby=.(fyear, best.n4.incseg)]
setnames(analyst.compustat, "fyear", "YEAR")
setnames(analyst.compustat, "best.n4.incseg", "best.n4")
combined.data.n4.followed <- merge(combined.data.n4, analyst.compustat , all.x=TRUE, by.x=c("YEAR", "NAICS"), by.y=c("YEAR", "best.n4"))
setorder(combined.data.n4.followed, NAICS, YEAR)
combined.data.n4.followed[,lag.W.alt2.public.presence  := shift(.SD), by=NAICS, .SDcols="W.alt2.public.presence"]

combined.data.n4.followed[,lag.BSEG.FIRMS.y  := shift(.SD), by=NAICS, .SDcols="BSEG.FIRMS.y"]
combined.data.n4.followed[,lag.BSEG.SALE.y  := shift(.SD), by=NAICS, .SDcols="BSEG.SALE.y"]
combined.data.n4.followed[,lag.ANALYSTS.EPS1  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.EPS1"]
combined.data.n4.followed[,lag.ANALYSTS.EPS2  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.EPS2"]
combined.data.n4.followed[,lag.ANALYSTS.EPS3  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.EPS3"]
combined.data.n4.followed[,lag.ANALYSTS.EPS4  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.EPS4"]
combined.data.n4.followed[,lag.ANALYSTS.EPS5  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.EPS5"]
combined.data.n4.followed[,lag.ANALYSTS.SAL1  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.SAL1"]
combined.data.n4.followed[,lag.ANALYSTS.GRM1  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.GRM1"]
combined.data.n4.followed[,lag.ANALYSTS.CPX1  := shift(.SD), by=NAICS, .SDcols="ANALYSTS.CPX1"]

####################
# UK Falsification #
####################
GB.orbis.financials <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/GB_key_financials-USD.txt", 
                             select=c(1,2,3,4,5,8,10,11,12,14,16,24,25),
                             col.names=c('BvD.id', 'consol.code', 'filing.type', 'date', 'months', 'source', 'original.currency', 'exchange.rate', 'sales', 'ni', 'ta', 'emps', 'market.cap'))
GB.orbis.financials[,consol.code      := factor(consol.code)]
GB.orbis.financials[,filing.type      := factor(filing.type)]
GB.orbis.financials[,source           := factor(source)]
GB.orbis.financials[,original.currency := factor(original.currency)]
GB.orbis.financials[,date := as.Date(as.character(date), format='%Y%m%d')]
GB.orbis.financials <- GB.orbis.financials[months==12]
GB.orbis.financials[,months := NULL]

saveRDS(GB.orbis.financials, "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/GB_financials_processed.RDS")
GB.orbis.financials <- readRDS("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/GB_financials_processed.RDS")


# Export German Identifiers for JAR data replication policy
fwrite(GB.orbis.financials[,.(BvD.id,consol.code,filing.type,date)], "C:/Users/omartian/GO/Identifiers/UKIdentifiers.csv")

# Remove double counting of consolidated records
setorder(GB.orbis.financials, BvD.id, date, -sales)
GB.orbis.financials.nodc <- GB.orbis.financials[,.SD[1],by=.(BvD.id,date)]


GB.orbis.industries <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/british_industry_classifications.txt", 
                             select=c(1,8,10,12,14,16,18))
setnames(GB.orbis.industries, c("BvD.id", "NACE4.core", "NACE4.primary", "NACE4.secondary", "NAICS4.core", "NAICS6.primary", "NAICS6.secondary"))
GB.orbis.industries <- melt(GB.orbis.industries, "BvD.id", na.rm=TRUE)
# Count number of primary and secondary industries
GB.orbis.NACE4.primary    <- GB.orbis.industries[variable=="NACE4.primary",   .N,keyby=BvD.id]
GB.orbis.NACE4.secondary  <- GB.orbis.industries[variable=="NACE4.secondary", .N,keyby=BvD.id]
GB.orbis.NAICS6.primary   <- GB.orbis.industries[variable=="NAICS6.primary",  .N,keyby=BvD.id]
GB.orbis.NAICS6.secondary <- GB.orbis.industries[variable=="NAICS6.secondary",.N,keyby=BvD.id]
# Create seperate datasets for each code
GB.orbis.NACE4.core       <-       GB.orbis.industries[variable=="NACE4.core",      .(BvD.id,industry=value)]
GB.orbis.NACE4.primary    <- merge(GB.orbis.industries[variable=="NACE4.primary",   .(BvD.id,industry=value)], GB.orbis.NACE4.primary,   by="BvD.id")
GB.orbis.NACE4.secondary  <- merge(GB.orbis.industries[variable=="NACE4.secondary", .(BvD.id,industry=value)], GB.orbis.NACE4.secondary, by="BvD.id")
GB.orbis.NAICS4.core      <-       GB.orbis.industries[variable=="NAICS4.core",     .(BvD.id,industry=value)]
GB.orbis.NAICS6.primary   <- merge(GB.orbis.industries[variable=="NAICS6.primary",  .(BvD.id,industry=value)], GB.orbis.NAICS6.primary,   by="BvD.id")
GB.orbis.NAICS6.secondary <- merge(GB.orbis.industries[variable=="NAICS6.secondary",.(BvD.id,industry=value)], GB.orbis.NAICS6.secondary, by="BvD.id")
rm(GB.orbis.industries)

# Link industries to firms 
GB.orbis.financials.with.n6 <- merge(GB.orbis.financials.nodc, GB.orbis.NAICS6.primary, by="BvD.id")
GB.orbis.financials.with.n6[,apportioned.sales:=sales/N]
GB.orbis.financials.with.n6 <- GB.orbis.financials.with.n6[industry >= 310000 & industry <= 339999]
GB.orbis.financials.with.n6[,industry := substr(industry,1,4)]
GB.orbis.financials.with.n6 <- GB.orbis.financials.with.n6[,sum(apportioned.sales),keyby=.(BvD.id,consol.code,filing.type,date,source,original.currency,
                                                                                           exchange.rate,sales,ni,ta,emps,market.cap,industry)]
GB.orbis.financials.with.n6[,year := year(date)]
setnames(GB.orbis.financials.with.n6, "V1", "naics4.sales")
GB.aggregated.sales <- merge(GB.orbis.financials.with.n6[!is.na(naics4.sales),                      .(disc.firms  =.N, disc.sales=sum(naics4.sales)),  keyby=.(industry,year)],
                             GB.orbis.financials.with.n6[!is.na(naics4.sales) & !is.na(market.cap), .(public.firms=.N, public.sale=sum(naics4.sales)), keyby=.(industry,year)],
                             by=c("industry", "year"), all.x=TRUE)
saveRDS(GB.aggregated.sales, "C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/GB_disclosure.RDS")
GB.aggregated.sales <- readRDS("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Orbis/GB_disclosure.RDS")



#==========================================================================================
# Process BACI data for UK Import Competition
#==========================================================================================
# Imports
UK.imports <- baci.data[j.iso2=="GB"]
UK.imports.aggregated <- UK.imports[,.(v=sum(tot.v), q=sum(tot.q)),keyby=.(naics,t)]


# Load UK Production
gb.prod.13.18 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,32)]
    setnames(europe.prod, c("prodcom.code", "gb.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, gb.production.eur := as.numeric(gb.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(gb.production.eur=sum(gb.production.eur), year=yr),keyby=nace]
}
gb.prod.06.12 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,38)]
    setnames(europe.prod, c("prodcom.code", "gb.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, gb.production.eur := as.numeric(gb.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(gb.production.eur=sum(gb.production.eur), year=yr),keyby=nace]
}
gb.prod.03.05 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website_snapshot_", yr, "_N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,41)]
    setnames(europe.prod, c("prodcom.code", "gb.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, gb.production.eur := as.numeric(gb.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(gb.production.eur=sum(gb.production.eur), year=yr),keyby=nace]
}
gb.prod.95.02 = function(yr) {
    europe.prod <- data.table(read_excel(paste0("C:/Users/omartian/Box/Go/ManufacturingDecline/Data/Eurostat/Website-snapshot-", yr, "-created-2009-11-09-N2.xlsx"), sheet = "Value"))
    europe.prod <- europe.prod[,c(1,32)]
    setnames(europe.prod, c("prodcom.code", "gb.production.eur"))
    europe.prod <- europe.prod[-1:-2]
    europe.prod <- europe.prod[!is.na(prodcom.code)]
    europe.prod[, nace := substr(sprintf("%08s", prodcom.code),1,4)]
    europe.prod[, gb.production.eur := as.numeric(gb.production.eur)]
    europe.prod[is.na(europe.prod)] <- 0
    europe.prod <- europe.prod[,.(gb.production.eur=sum(gb.production.eur), year=yr),keyby=nace]
}

gb.production <- rbindlist(c(lapply(as.list(2013:2018), gb.prod.13.18),
                             lapply(as.list(2006:2012), gb.prod.06.12),
                             lapply(as.list(2003:2005), gb.prod.03.05),
                             lapply(as.list(1995:2002), gb.prod.95.02)))


# Convert NACE4 to NAICS4 to match up with disclosure and imports
naics.nace.concordance <- fread("C:/Users/omartian/Box/GO/ManufacturingDecline/Data/Concordances/NACE_REV2-US_NAICS_2012.csv", 
                                colClasses = list('character'=c(1,3)))
setnames(naics.nace.concordance, c('nace.r2', 'nace.desc', 'naics.12', 'naics.desc', 'notes'))
naics.nace.concordance[,nace.r2 := gsub("\\.","",nace.r2)]
naics.nace.concordance[,naics4 := substr(naics.12, 1, 4)]
naics.nace.concordance <- unique(naics.nace.concordance[,.(nace.r2,naics4)])
nace.conversion <- merge(naics.nace.concordance, naics.nace.concordance[,.N,keyby=nace.r2], by="nace.r2")
setnames(nace.conversion, 'N', 'num.matches')

gb.production.naics <- merge(gb.production, nace.conversion, by.x='nace', by.y='nace.r2', allow.cartesian=TRUE)
gb.production.naics[,gb.production.eur := gb.production.eur / num.matches]
gb.production.naics <- gb.production.naics[,.(gb.production.eur = sum(gb.production.eur)),keyby=.(year,naics4)]
gb.production.naics <- merge(gb.production.naics, euro.usd.exchange.rate[,.(year,exchange.rate)], by="year")
gb.production.naics[,gb.production.usd := gb.production.eur * exchange.rate]


#==========================================================================================
# Create the dataset to run the analyses
#==========================================================================================
# Merge imports with production to get import competition
UK.data <- merge(UK.imports.aggregated[,.(naics,year=t,imports=v)],
                 gb.production.naics[naics4 > 3100 & naics4 < 3400,.(naics=naics4,year,gb.production.usd)],
                 by=c("naics","year"), all=TRUE)
UK.data[is.na(UK.data)]<-0
UK.data[,import.competition := ifelse(gb.production.usd==0, NA, imports/gb.production.usd)]
UK.data[,W.import.competition := winsor(import.competition)]

# Merge with disclosed sales to get disclosure fraction
UK.data <- merge(UK.data,
                 GB.aggregated.sales, 
                 by.x=c("naics","year"), by.y=c("industry","year"), all.x=TRUE)
UK.data[is.na(UK.data)] <- 0
UK.data[,disclosure := ifelse(gb.production.usd==0,NA,as.numeric(disc.sales)/1000/gb.production.usd)]
UK.data[,W.disclosure := winsor(disclosure)]
UK.data[,public.sales.frac := ifelse(gb.production.usd==0,NA,as.numeric(public.sale)/1000/gb.production.usd)]
UK.data[,W.public.sales.frac := winsor(public.sales.frac)]

# Lag disclosure 1 year
setorder(UK.data, naics, year)
UK.data[, lag.W.disclosure        := shift(.SD), by=.(naics), .SDcols="W.disclosure"]
UK.data[, lag.W.public.sales.frac := shift(.SD), by=.(naics), .SDcols="W.public.sales.frac"]
UK.data[, lag.gb.production.usd := shift(.SD), by=.(naics), .SDcols="gb.production.usd"]
UK.data[, lag2.gb.production.usd := shift(.SD), by=.(naics), .SDcols="lag.gb.production.usd"]

UK.data1 <- UK.data[!is.na(lag.W.disclosure)]
UK.data2 <- cbind(UK.data1, 
                  data.table(demeanlist(UK.data1[,.(demeaned.lag.W.disclosure      = lag.W.disclosure,
                                                    demeaned.lag.W.public.sales.frac = lag.W.public.sales.frac,
                                                    demeaned.W.import.competition = W.import.competition)],
                                        list(factor(UK.data1$naics))))) 
UK.data2[, S.demeaned.lag.W.disclosure     := scale(demeaned.lag.W.disclosure)]
UK.data2[, S.demeaned.lag.W.public.sales.frac := scale(demeaned.lag.W.public.sales.frac)]
UK.data2[, S.demeaned.W.import.competition := scale(demeaned.W.import.competition)]

UK.data3 <- merge(UK.data2, combined.data.n4[,.(frac.related, imports.K, RCPTOT, YEAR, NAICS, 
                                                S.demeaned.lag.W.alt2.public.presence, lag.W.alt2.public.presence)],
                  by.x=c("year", "naics"), by.y=c("YEAR", "NAICS"))
UK.data4 <- cbind(UK.data3[!is.na(frac.related)], 
                  data.table(demeanlist(UK.data3[!is.na(frac.related),.(demeaned.US.import.comp      = (1-frac.related)*winsor(imports.K/RCPTOT))],
                                        list(factor(UK.data3[!is.na(frac.related),naics]))))) 






###########
# Table 2 #
###########
determ     <- felm(scale(W.alt2.public.presence) ~ scale(ave.duty.rate.overall) + S.s1999mean + ifelse(YEAR > 2001,S.s1999mean, 0) + scale(VALADD.perc) + scale(PAYANN.perc) + 
                       scale(WAGERATE) + scale(pct20) + scale(RCPTOT.growth) + scale(aggregate.v/1000000000)                                
                   | YEAR | 0 |YEAR + NAICS, data=combined.data.n4)
determ.ec     <- felm(scale(W.alt2.public.presence) ~ scale(ave.duty.rate.overall) + S.s1999mean + scale(VALADD.perc) + scale(PAYANN.perc) + 
                          scale(WAGERATE) + scale(pct20) + scale(RCPTOT.growth) + scale(aggregate.v/1000000000)  +  scale(CEXTOT.perc) + scale(INVTOTE.perc) 
                      | YEAR | 0 |YEAR + NAICS, data=combined.data.n4)
determ.ife <- felm(scale(W.alt2.public.presence) ~ scale(ave.duty.rate.overall) + ifelse(YEAR > 2001,S.s1999mean, 0) + scale(VALADD.perc) + scale(PAYANN.perc) + 
                       scale(WAGERATE) + scale(pct20) + scale(RCPTOT.growth) + scale(aggregate.v/1000000000)                                    
                   | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)
determ.ife.ec <- felm(scale(W.alt2.public.presence) ~ scale(ave.duty.rate.overall) + scale(VALADD.perc) + scale(PAYANN.perc) + 
                          scale(WAGERATE) + scale(pct20) + scale(RCPTOT.growth) + scale(aggregate.v/1000000000)  +  scale(CEXTOT.perc) + scale(INVTOTE.perc)                                
                      | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)

stargazer(determ, determ.ec, determ.ife, determ.ife.ec, type='text', style="qje", #out="C:/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision2/Determ.tex", 
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a", 
          dep.var.labels = c("$\\text{PublicPresence}_{i,t}$"),
          covariate.labels = c("$\\text{Tariff}_{i,t}$",
                               "$\\text{NTRGap}_{i}",
                               "$\\text{NTRGap}_{i} \\times \\text{Post2001}_{t}$",
                               "$\\text{ValueAdd}_{i,t}$",
                               "$\\text{Payroll}_{i,t}$",
                               "$\\text{WageRate}_{i,t}$",
                               "$\\text{Concentration}_{i,t}$",  
                               "$\\text{IndustryGrowth}_{i,t}$",
                               "$\\text{ImportsOHIC}_{i,t}$",  
                               "$\\text{CapEx}_{i,t}$",
                               "$\\text{TotalInv}_{i,t}$"),
          add.lines=list("\\textit{Fixed Effects:}", 
                         c("Industry ($i$)",                                   rep("\\multicolumn{1}{c}{No}", 2),rep("\\multicolumn{1}{c}{Yes}", 2)),
                         c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 4)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(determ$clustervar$NAICS)),
                           length(unique(determ.ec$clustervar$NAICS)),
                           length(unique(determ.ife$clustervar$NAICS)),
                           length(unique(determ.ife.ec$clustervar$NAICS))),
                         c("Year", 
                           length(unique(determ$clustervar$YEAR)),
                           length(unique(determ.ec$clustervar$YEAR)),
                           length(unique(determ.ife$clustervar$YEAR)),
                           length(unique(determ.ife.ec$clustervar$YEAR))),
                         "",
                         c("$N$", determ$N,
                           determ.ec$N,
                           determ.ife$N,
                           determ.ife.ec$N),
                         c("$R^2$ Full Model", round(summary(determ)$r.squared,3),
                           round(summary(determ.ec)$r.squared,3),
                           round(summary(determ.ife)$r.squared,3),
                           round(summary(determ.ife.ec)$r.squared,3)),
                         c("$R^2$ Projected Model", round(summary(determ)$P.r.squared,3),
                           round(summary(determ.ec)$P.r.squared,3),
                           round(summary(determ.ife)$P.r.squared,3),
                           round(summary(determ.ife.ec)$P.r.squared,3))))


####################
# Table 3, Panel A #
####################
sa2.c.main.rp           <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT)))         
                                ~ S.demeaned.lag.W.alt2.public.presence  
                                | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)
sa2.c.main.ctrls.rp     <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                ~ S.demeaned.lag.W.alt2.public.presence + 
                                    lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)
sa2.c.main.nonlinear.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT)))
                                ~ S.demeaned.lag.W.alt2.public.presence + I(S.demeaned.lag.W.alt2.public.presence * S.demeaned.lag.W.alt2.public.presence) + 
                                    lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)     
                                | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)
sa2.c.main.ihs.rp       <- felm(ihs(I((1 - frac.related) * winsor((imports.K/RCPTOT))))
                                ~ ihs(lag.W.alt2.public.presence) +
                                    ihs(lag.ave.duty.rate.overall) + ihs(post_ntr_ns) + ihs(lag.VALADD.perc) + ihs(lag.PAYANN.perc) + ihs(lag.WAGERATE) + ihs(lag.pct20) +
                                    ihs(lag.RCPTOT.growth) + ihs(aggregate.v/1000000000)     
                                | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)
stargazer(sa2.c.main.rp, sa2.c.main.ctrls.rp, sa2.c.main.nonlinear.rp, sa2.c.main.ihs.rp,          
          style="qje", type='text',#out="C:/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision2/MainConsol.tex", 
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a", 
          dep.var.labels = c("Demeaned", 
                             "arcisnh"),
          #covariate.labels = c("$\\text{PublicPresence}_{i,t-1}$", 
          #                     "$\\text{PublicPresence}_{i,t-1}^{2}$",
          #                     "$\\text{Tariff}_{i,t-1}$",
          #                     "$\\text{NTRGap}_{i} \\times \\text{Post2001}_{t}$",
          #                     "$\\text{ValueAdd}_{i,t-1}$",
          #                     "$\\text{Payroll}_{i,t-1}$",
          #                     "$\\text{WageRate}_{i,t-1}$",
          #                     "$\\text{Concentration}_{i,t-1}$",  
          #                     "$\\text{IndustryGrowth}_{i,t-1}$",
          #                     "$\\text{ImportsOHIC}_{i,t}$"),
          add.lines=list(c("\\textit{Observation Level:}", rep("\\multicolumn{1}{c}{$i,t$}", 4)),
                         "",  
                         "\\textit{Fixed Effects:}", 
                         c("Industry ($i$)",                                   rep("\\multicolumn{1}{c}{Yes}", 4)),
                         c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 4)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(sa2.c.main.rp$clustervar$NAICS)),
                           length(unique(sa2.c.main.ctrls.rp$clustervar$NAICS)),
                           length(unique(sa2.c.main.nonlinear.rp$clustervar$NAICS)),
                           length(unique(sa2.c.main.ihs.rp$clustervar$NAICS))),
                         c("Year", 
                           length(unique(sa2.c.main.rp$clustervar$YEAR)),
                           length(unique(sa2.c.main.ctrls.rp$clustervar$YEAR)),
                           length(unique(sa2.c.main.nonlinear.rp$clustervar$YEAR)),
                           length(unique(sa2.c.main.ihs.rp$clustervar$YEAR))),
                         "",
                         c("$N$", sa2.c.main.rp$N,
                           sa2.c.main.ctrls.rp$N,
                           sa2.c.main.nonlinear.rp$N,
                           sa2.c.main.ihs.rp$N),
                         c("$R^2$ Full Model", round(summary(sa2.c.main.rp)$r.squared,3),
                           round(summary(sa2.c.main.ctrls.rp)$r.squared,3),
                           round(summary(sa2.c.main.nonlinear.rp)$r.squared,3),
                           round(summary(sa2.c.main.ihs.rp)$r.squared,3)),
                         c("$R^2$ Projected Model", round(summary(sa2.c.main.rp)$P.r.squared,3),
                           round(summary(sa2.c.main.ctrls.rp)$P.r.squared,3),
                           round(summary(sa2.c.main.nonlinear.rp)$P.r.squared,3),
                           round(summary(sa2.c.main.ihs.rp)$P.r.squared,3))))

####################
# Table 3, Panel B #
####################
sa2.c.main.diffscale.rp <- felm(scale((1 - frac.related) * winsor(imports.K)) 
                                ~ S.demeaned.lag.W.alt2.public.presence  +
                                    lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + 
                                    lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)     
                                | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)

sa2.c.main.diffscale.denom.rp <- felm(scale(RCPTOT) ~ S.demeaned.lag.W.alt2.public.presence + 
                                          lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)     
                                      | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4[!is.na(frac.related)])

sa2.c.main.rp <- felm(scale((frac.related) * winsor(imports.K)) ~ S.demeaned.lag.W.alt2.public.presence  +
                          lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)     
                      | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4)



stargazer(sa2.c.main.diffscale.rp, sa2.c.main.diffscale.denom.rp, sa2.c.main.rp,           
          style="qje", #out="C:/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision2/MainConsol-B.tex", 
          type='text',
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a", 
          dep.var.labels = c("$\\text{Imports}_{i,t}}$", 
                             "$\\text{USProduction}_{i,t}$",
                             "$\\text{RPImports}_{i,t}}$"), 
          covariate.labels = c("$\\text{PublicPresence}_{i,t-1}$", 
                               "$\\text{Tariff}_{i,t-1}$",
                               "$\\text{NTRGap}_{i} \\times \\text{Post2001}_{t}$",
                               "$\\text{ValueAdd}_{i,t-1}$",
                               "$\\text{Payroll}_{i,t-1}$",
                               "$\\text{WageRate}_{i,t-1}$",
                               "$\\text{Concentration}_{i,t-1}$",  
                               "$\\text{IndustryGrowth}_{i,t-1}$",
                               "$\\text{ImportsOHIC}_{i,t}$"),
          add.lines=list("\\textit{Fixed Effects:}", 
                         c("Industry ($i$)",                                   rep("\\multicolumn{1}{c}{Yes}", 6)),
                         c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 6)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(sa2.c.main.diffscale.rp$clustervar$NAICS)),
                           length(unique(sa2.c.main.diffscale.denom.rp$clustervar$NAICS)),
                           length(unique(sa2.c.main.rp$clustervar$NAICS))),
                         c("Year", 
                           length(unique(sa2.c.main.diffscale.rp$clustervar$YEAR)),
                           length(unique(sa2.c.main.diffscale.denom.rp$clustervar$YEAR)),
                           length(unique(sa2.c.main.rp$clustervar$YEAR))),
                         "",
                         c("$N$", sa2.c.main.diffscale.rp$N,
                           sa2.c.main.diffscale.denom.rp$N,
                           sa2.c.main.rp$N),
                         c("$R^2$ Full Model", round(summary(sa2.c.main.diffscale.rp)$r.squared,3),
                           round(summary(sa2.c.main.diffscale.denom.rp)$r.squared,3),
                           round(summary(sa2.c.main.rp)$r.squared,3)
                         ),
                         c("$R^2$ Projected Model", round(summary(sa2.c.main.diffscale.rp)$P.r.squared,3),
                           round(summary(sa2.c.main.diffscale.denom.rp)$P.r.squared,3),
                           round(summary(sa2.c.main.rp)$P.r.squared,3))))

####################
# Table 3, Panel C #
####################
d.c.main.ctrls.fe  <- felm(I(((1 - frac.related) * winsor(imports.K/RCPTOT)) - ((1 - lag.frac.related) * winsor(lag.imports.K/lag.RCPTOT))) ~
                               as.integer(split_quantile(lag.W.alt2.public.presence - lag2.W.alt2.public.presence,3)) 
                           +as.integer(split_quantile(lag2.W.alt2.public.presence - lag3.W.alt2.public.presence,3)) 
                           +as.integer(split_quantile(lag3.W.alt2.public.presence - lag4.W.alt2.public.presence,3)) +
                               + scale(d.lag.ave.duty.rate.overall) + ifelse(YEAR == 2001,-S.s1999mean, 0) +
                               scale(d.lag.RCPTOT.growth) + scale(d.lag.VALADD.perc) + scale(d.lag.PAYANN.perc) + scale(d.lag.WAGERATE)
                           + scale(d.lag.pct20) + scale((aggregate.v-lag.aggregate.v)/1000000000) 
                           | YEAR         | 0 |YEAR + NAICS, data=combined.data.n4[!is.na(lag2.W.alt2.public.presence)&!is.na(lag.W.alt2.public.presence)&!is.na(lag3.W.alt2.public.presence)&!is.na(lag4.W.alt2.public.presence)])

d.c.main.ctrls.fe.a  <- felm(I(((1 - frac.related) * winsor(imports.K/RCPTOT)) - ((1 - lag.frac.related) * winsor(lag.imports.K/lag.RCPTOT))) ~
                                 ifelse(lag.W.alt2.public.presence - lag2.W.alt2.public.presence<=quantile(lag.W.alt2.public.presence - lag2.W.alt2.public.presence,.33,na.rm=TRUE), 1, 0) +
                                 ifelse(lag.W.alt2.public.presence - lag2.W.alt2.public.presence>=quantile(lag.W.alt2.public.presence - lag2.W.alt2.public.presence,.67,na.rm=TRUE), 1, 0) +
                                 ifelse(lag2.W.alt2.public.presence - lag3.W.alt2.public.presence<=quantile(lag2.W.alt2.public.presence - lag3.W.alt2.public.presence,.33,na.rm=TRUE), 1, 0) +
                                 ifelse(lag2.W.alt2.public.presence - lag3.W.alt2.public.presence>=quantile(lag2.W.alt2.public.presence - lag3.W.alt2.public.presence,.67,na.rm=TRUE), 1, 0) +
                                 ifelse(lag3.W.alt2.public.presence - lag4.W.alt2.public.presence<=quantile(lag3.W.alt2.public.presence - lag4.W.alt2.public.presence,.33,na.rm=TRUE), 1, 0) +
                                 ifelse(lag3.W.alt2.public.presence - lag4.W.alt2.public.presence>=quantile(lag3.W.alt2.public.presence - lag4.W.alt2.public.presence,.67,na.rm=TRUE), 1, 0) +
                                 + scale(d.lag.ave.duty.rate.overall) + ifelse(YEAR == 2001,-S.s1999mean, 0) +
                                 scale(d.lag.RCPTOT.growth) + scale(d.lag.VALADD.perc) + scale(d.lag.PAYANN.perc) + scale(d.lag.WAGERATE)
                             + scale(d.lag.pct20) + scale((aggregate.v-lag.aggregate.v)/1000000000) 
                             | YEAR         | 0 |YEAR + NAICS, data=combined.data.n4)

stargazer(d.c.main.ctrls.fe,d.c.main.ctrls.fe.a,           
          style="qje", #out="/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision/MainConsol-C-2.tex", 
          type='text',
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a", 
          dep.var.labels = c("$\\Delta\\text{ImportComp}_{i,t}}$"),
          keep="public.presence",
          covariate.labels = c("$\\text{Tercile}(\\textit{PublicPresence}_{i,t-1}-\\textit{PublicPresence}_{i,t-2}$)",
                               "$\\text{Tercile}(\\textit{PublicPresence}_{i,t-2}-\\textit{PublicPresence}_{i,t-3}$)",
                               "$\\text{Tercile}(\\textit{PublicPresence}_{i,t-3}-\\textit{PublicPresence}_{i,t-4}$)",
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-1}-\\text{PublicPresence}_{i,t-2})=1)$", 
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-1}-\\text{PublicPresence}_{i,t-2})=3)$", 
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-2}-\\text{PublicPresence}_{i,t-3})=1)$", 
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-2}-\\text{PublicPresence}_{i,t-3})=3)$", 
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-3}-\\text{PublicPresence}_{i,t-4})=1)$", 
                               "$\\doubleI(\text{Tercile}($\\text{PublicPresence}_{i,t-3}-\\text{PublicPresence}_{i,t-4})=3)$"), 
          add.lines=list(c("\\textit{Controls (Changes)}", rep("\\multicolumn{1}{c}{Yes}", 2)),
                         "",
                         "\\textit{Fixed Effects:}", 
                         c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 2)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(d.c.main.ctrls.fe$clustervar$NAICS)),
                           length(unique(d.c.main.ctrls.fe.a$clustervar$NAICS))),
                         c("Year", 
                           length(unique(d.c.main.ctrls.fe$clustervar$YEAR)),
                           length(unique(d.c.main.ctrls.fe.a$clustervar$YEAR))),
                         "",
                         c("$N$", 
                           d.c.main.ctrls.fe$N,
                           d.c.main.ctrls.fe.a$N),
                         c("$R^2$ Full Model", 
                           round(summary(d.c.main.ctrls.fe)$r.squared,3),
                           round(summary(d.c.main.ctrls.fe.a)$r.squared,3)),
                         c("$R^2$ Projected Model", 
                           round(summary(d.c.main.ctrls.fe)$P.r.squared,3),
                           round(summary(d.c.main.ctrls.fe.a)$P.r.squared,3))))


###########
# Table 4 #
###########


multiyear.flat.bhar.fit <- felm(S.import.comp.no.rp ~ S.lag.ave.duty.rate.overall + post_ntr_ns + S.lag.VALADD.perc + S.lag.PAYANN.perc + S.lag.WAGERATE +
                                    S.lag.pct20 + S.lag.RCPTOT.growth +I(aggregate.v/1000000000) | NAICS + YEAR | 
                                    (S.demeaned.lag.W.alt2.public.presence ~ ifelse(YEAR >= 2005, S.bhar.ind.ew, 0)) | NAICS + YEAR, 
                                data=sox.event.consol[NAICS %in% Sox.changes[!is.na(import.comp.no.rp.x) & !is.na(S.bhar.ind.ew),NAICS] & YEAR >= 2000 & YEAR <= 2008])
multiyear.flex.bhar.fit <- felm(S.import.comp.no.rp ~ S.lag.ave.duty.rate.overall + post_ntr_ns + S.lag.VALADD.perc + S.lag.PAYANN.perc + S.lag.WAGERATE +
                                    S.lag.pct20 + S.lag.RCPTOT.growth +I(aggregate.v/1000000000) | NAICS + YEAR | 
                                    (S.demeaned.lag.W.alt2.public.presence ~   ifelse(YEAR == 2003, S.bhar.ind.ew, 0)+ 
                                         ifelse(YEAR == 2004, S.bhar.ind.ew, 0)+
                                         ifelse(YEAR == 2005, S.bhar.ind.ew, 0)+ 
                                         ifelse(YEAR == 2006, S.bhar.ind.ew, 0)+ 
                                         ifelse(YEAR == 2007, S.bhar.ind.ew, 0)+ 
                                         ifelse(YEAR == 2008, S.bhar.ind.ew, 0)) | NAICS + YEAR, 
                                data=sox.event.consol[NAICS %in% Sox.changes[!is.na(import.comp.no.rp.x) & !is.na(S.bhar.ind.ew),NAICS] & YEAR >= 2000 & YEAR <= 2008])


stargazer(multiyear.flat.bhar.fit$stage1, multiyear.flex.bhar.fit$stage1, order=c(9:15,1:8), 
          style="qje", out="C:/Users/omartian/Downloads/iv_stage1_new.tex", type='text',
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "c#-t-a", 
          column.labels = c("$\\textit{PublicPresence}_{i,t-1}$"),
          covariate.labels = c("$\\textit{SOXBHAR}_{i} \\times \\textit{YearGE2005}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2003}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2004}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2005}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2006}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2007}_{t}$",
                               "$\\textit{SOXBHAR}_{i} \\times \\textit{Year2008}_{t}$",
                               "$\\text{Tariff}_{i,t-1}$",
                               "$\\text{NTRGap}_{i} \\times \\text{Post2001}_{t}$",
                               "$\\text{ValueAdd}_{i,t-1}$",
                               "$\\text{Payroll}_{i,t-1}$",
                               "$\\text{WageRate}_{i,t-1}$",
                               "$\\text{Concentration}_{i,t-1}$",  
                               "$\\text{IndustryGrowth}_{i,t-1}$",
                               "$\\text{ImportsOHIC}_{i,t}$"),
          add.lines=list("\\textit{Fixed Effects:}", 
                         c("Industry ($i$)",                                   rep("\\multicolumn{1}{c}{Yes}", 2)),
                         c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 2)),
                         c("$N$", multiyear.flat.bhar.fit$stage1$N,
                           multiyear.flat.bhar.fit$stage1$N),
                         c("$R^2$ Full Model", round(summary(multiyear.flat.bhar.fit$stage1)$r.squared,3),
                           round(summary(multiyear.flex.bhar.fit$stage1)$r.squared,3)),
                         c("$R^2$ Projected Model", round(summary(multiyear.flat.bhar.fit$stage1)$P.r.squared,3),
                           round(summary(multiyear.flex.bhar.fit$stage1)$P.r.squared,3))))
stargazer(multiyear.flat.bhar.fit, multiyear.flex.bhar.fit, order=c(9,1:8), type='text')

########################
# SOX Eventtime Graphs #
########################

sox.stage1.graph.fit <- felm(scale(W.alt2.public.presence)~ ave.duty.rate.overall + post_ntr_ns
                             + VALADD.perc + PAYANN.perc + WAGERATE + pct20 + RCPTOT.growth + 
                                 ifelse(YEAR == 1999, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2000, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2001, bhar.ind.ew, 0) +
                                 #    ifelse(YEAR == 2002, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2003, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2004, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2005, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2006, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2007, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2008, bhar.ind.ew, 0) 
                             | YEAR + NAICS, 
                             data=sox.event.consol[!is.na(rank.ew)&YEAR <= 2008 & YEAR >=1999 & NAICS %in% Sox.changes[!is.na(import.comp.no.rp.x) & !is.na(S.bhar.ind.ew),NAICS]])
sox.stage1.graph.data <- data.table(period=c(1999,2000,2001,2003,2004,2005,2006,2007,2008), 
                                    coef=sox.stage1.graph.fit$beta[8:16], 
                                    data.table(confint(sox.stage1.graph.fit, level=.90))[8:16])
setnames(sox.stage1.graph.data, c("5 %", "95 %"), c("ci.low", "ci.high"))
# create a point for the reference period
sox.stage1.graph.data <- rbind(sox.stage1.graph.data, data.table(period=2002,coef=0,ci.low=0,ci.high=0), use.names=FALSE)
setorder(sox.stage1.graph.data,period)

saveRDS(sox.stage1.graph.data, "C:/Users/omartian/sox_stage1_graph_data.RDS")

tikz(file = "C:/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision2/sox_stage1_graph.tex", width = 6.5, height = 3.5)
ggplot(data=sox.stage1.graph.data) + 
    geom_pointrange(aes(x=period, y=coef, ymin=ci.low, ymax=ci.high), fatten=2) + 
    #annotate("rect", ymin = -.12, ymax = .3, xmin = 2002.5, xmax = 2008.5, alpha=.2) + 
    theme_minimal() + 
    #coord_cartesian(ylim = c(-.22, .7),xlim=c(2002.5,2012.5), expand=FALSE) + 
    scale_x_continuous("Year", breaks = seq(1999,2008,1)) +
    scale_y_continuous("Coefficient") +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(size=.1, color="gray"))
dev.off()

sox.stage2.graph.fit <- felm(S.import.comp.no.rp  ~ lag.ave.duty.rate.overall + post_ntr_ns
                             + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + 
                                 ifelse(YEAR == 2000, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2001, bhar.ind.ew, 0) +
                                 # ifelse(YEAR == 2002, S.bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2003, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2004, bhar.ind.ew, 0) + 
                                 ifelse(YEAR == 2005, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2006, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2007, bhar.ind.ew, 0) +
                                 ifelse(YEAR == 2008, bhar.ind.ew, 0) | YEAR + NAICS, 
                             data=sox.event.consol[!is.na(rank.ew)&YEAR <= 2008])
sox.stage2.graph.data <- data.table(period=c(2000,2001,2003,2004,2005,2006,2007,2008), 
                                    coef=sox.stage2.graph.fit$beta[8:15], 
                                    data.table(confint(sox.stage2.graph.fit, level=.90))[8:15])
setnames(sox.stage2.graph.data, c("5 %", "95 %"), c("ci.low", "ci.high"))
# create a point for the reference period
sox.stage2.graph.data <- rbind(sox.stage2.graph.data, data.table(period=2002,coef=0,ci.low=0,ci.high=0), use.names=FALSE)
setorder(sox.stage2.graph.data,period)

saveRDS(sox.stage2.graph.data, "C:/Users/omartian/sox_stage2_graph_data.RDS")

tikz(file = "C:/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision2/sox_stage2_graph.tex", width = 6.5, height = 3.5)
ggplot(data=sox.stage2.graph.data) + 
    geom_pointrange(aes(x=period, y=coef, ymin=ci.low, ymax=ci.high), fatten=2) + 
    #annotate("rect", ymin = -.12, ymax = .3, xmin = 2002.5, xmax = 2008.5, alpha=.2) + 
    theme_minimal() + 
    #coord_cartesian(ylim = c(-.22, .7),xlim=c(2002.5,2012.5), expand=FALSE) + 
    scale_x_continuous("Year", breaks = seq(2000,2008,1)) +
    scale_y_continuous("Coefficient") +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(size=.1, color="gray"))
dev.off()



######################
# German Regressions #
######################


de.regress.1     <- felm(W.import.competition ~ ifelse(year>=2008, W.disc.treat,0) | naics + year | 0 |naics + year,         
                         data=German.data5[year <= 2012  & year >= 2003 ])
de.regress.2     <- felm(W.import.competition ~ ifelse(year>=2008, W.disc.treat,0) +  
                             scale(demeaned.US.import.comp) | naics + year | 0 |naics + year,         
                         data=German.data5[year <= 2012  & year >= 2003 ])
de.regress.2a     <- felm(W.import.competition ~ ifelse(year>=2008, W.disc.treat,0) +  
                              scale(demeaned.US.import.comp) +ifelse(year>=2008,scale(demeaned.US.import.comp),0) | naics + year | 0 |naics + year,         
                          data=German.data5[year <= 2012  & year >= 2003 ])
de.regress.3     <- felm(W.import.competition ~ ifelse(year>=2008, W.disc.treat,0) +  
                             scale(demeaned.US.import.comp) + S.demeaned.lag.W.public.sales.frac | naics + year | 0 |naics + year,         
                         data=German.data5[year <= 2012  & year >= 2003 ])
de.regress.3a     <- felm(W.import.competition ~ ifelse(year>=2008, W.disc.treat,0) +  
                              scale(demeaned.US.import.comp) +ifelse(year>=2008,scale(demeaned.US.import.comp),0) + 
                              S.demeaned.lag.W.public.sales.frac + ifelse(year >=2008, S.demeaned.lag.W.public.sales.frac,0) | naics + year | 0 |naics + year,         
                          data=German.data5[year <= 2012  & year >= 2003 ])

stargazer(de.regress.1, de.regress.2, de.regress.2a, de.regress.3, de.regress.3a,         
          style="qje", #out="/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision/GermanRegressions.tex", 
          type='text',
          float=FALSE, keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a", 
          dep.var.labels = c("$\\text{GermanImportComp}_{i,t}$"), 
       #   covariate.labels = c("$\\textit{Post2007}_{t} \\times \\textit{DiscShift}_{i}$", 
    #                           "$\\text{ImportComp}_{i,t}$",
     #                          "$\\textit{GermanPublicPresence}_{i,t-1}$"),
          add.lines=list( "\\textit{Fixed Effects:}", 
                          c("Industry ($i$)",                                   rep("\\multicolumn{1}{c}{Yes}", 5)),
                          c("Year ($t$)",                                       rep("\\multicolumn{1}{c}{Yes}", 5)),
                          "",
                          "\\textit{Number of Clusters:}",
                          c("Industry", 
                            length(unique(de.regress.1$clustervar$naics)),
                            length(unique(de.regress.2$clustervar$naics)),
                            length(unique(de.regress.2a$clustervar$naics)),
                            length(unique(de.regress.3$clustervar$naics)),
                            length(unique(de.regress.3a$clustervar$naics))),
                          c("Year", 
                            length(unique(de.regress.1$clustervar$year)),
                            length(unique(de.regress.2$clustervar$year)),
                            length(unique(de.regress.2a$clustervar$year)),
                            length(unique(de.regress.3$clustervar$year)),
                            length(unique(de.regress.3a$clustervar$year))),
                          "",
                          c("$N$", de.regress.1$N,
                            de.regress.2$N,
                            de.regress.2a$N,
                            de.regress.3$N,
                            de.regress.3a$N),
                          c("$R^2$ Full Model", round(summary(de.regress.1)$r.squared,3),
                            round(summary(de.regress.2)$r.squared,3),
                            round(summary(de.regress.2a)$r.squared,3),
                            round(summary(de.regress.3)$r.squared,3),
                            round(summary(de.regress.3a)$r.squared,3)),
                          c("$R^2$ Projected Model", round(summary(de.regress.1)$P.r.squared,3),
                            round(summary(de.regress.2)$P.r.squared,3),
                            round(summary(de.regress.2a)$P.r.squared,3),
                            round(summary(de.regress.3)$P.r.squared,3),
                            round(summary(de.regress.3a)$P.r.squared,3))))


de.regress     <- felm(W.import.competition ~ 
                           ifelse(year==2003, W.disc.treat,0) + 
                           ifelse(year==2004, W.disc.treat,0) + 
                           ifelse(year==2005, W.disc.treat,0) + 
                           ifelse(year==2006, W.disc.treat,0) + 
                           ifelse(year==2008, W.disc.treat,0) + 
                           ifelse(year==2009, W.disc.treat,0) + 
                           ifelse(year==2010, W.disc.treat,0) + 
                           ifelse(year==2011, W.disc.treat,0) + 
                           ifelse(year==2012, W.disc.treat,0) + scale(demeaned.US.import.comp)
                       | naics + year | 0 |naics + year,         
                       data=German.data5[year <= 2012  & year >= 2003 ])



de.eventtime.data <- data.table(period=c(2003, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2012, 9999), 
                                coef=de.regress$beta, data.table(confint(de.regress, level=.90)))
setnames(de.eventtime.data, c("5 %", "95 %"), c("ci.low", "ci.high"))
# create a point for the reference period
de.eventtime.data <- rbind(de.eventtime.data, data.table(period=2007,coef=0,ci.low=0,ci.high=0), use.names=FALSE)
setorder(de.eventtime.data,period)
de.eventtime.data



tikz(file = "C:/Users/omartian/Box/GO/ManufacturingDecline/Figures/de_eventtime.tex", width = 6.5, height = 3.5)
ggplot(data=de.eventtime.data[period < 3000]) + 
    geom_pointrange(aes(x=period, y=coef.W.import.competition, ymin=ci.low, ymax=ci.high), fatten=2) + 
    annotate("rect", ymin = -.25, ymax = .7, xmin = 2007.5, xmax = 2012.5, alpha=.2) + 
    theme_minimal() + 
    coord_cartesian(ylim = c(-.22, .7),xlim=c(2002.5,2012.5), expand=FALSE) + 
    scale_x_continuous("Year", breaks = seq(2003,2012,1)) +
    scale_y_continuous("Coefficient") +
    theme(panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(size=.1, color="gray"))
dev.off()


tikz(file = "C:/Users/omartian/Box/GO/ManufacturingDecline/Figures/de_treatment.tex", width = 6.5, height = 3.5)
ggplot(data=German.data1[year>=2003 & year <= 2012,.(W.disclosure=quantile(W.disclosure,.5,na.rm=TRUE)),by=year]) + 
    geom_line(aes(y=W.disclosure,x=year)) + geom_point(aes(y=W.disclosure,x=year)) + theme_minimal() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_line(size=.1, color="gray")) +
    scale_x_continuous("Year", breaks = seq(2003,2012,1)) +
    scale_y_continuous("Disclosure Ratio") 
dev.off()

###########
# Table 6 #
###########

eq.rsqvf.c  <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) ~ rsquare_volfull + 
                        S.demeaned.lag.W.alt2.public.presence + S.demeaned.lag.W.alt2.public.presence:rsquare_volfull +
                        lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000) 
                    | YEAR + NAICS | 0 |YEAR + NAICS, data=comb.data.eq)
eq.rsqvea.c <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) ~ rsquare_volrdq + 
                        S.demeaned.lag.W.alt2.public.presence + S.demeaned.lag.W.alt2.public.presence:rsquare_volrdq +
                        lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000) 
                    | YEAR + NAICS | 0 |YEAR + NAICS, data=comb.data.eq)
eq.rsqarf.c  <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) ~ rsquare_absretfull + 
                         S.demeaned.lag.W.alt2.public.presence + S.demeaned.lag.W.alt2.public.presence:rsquare_absretfull +
                         lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000) 
                     | YEAR + NAICS | 0 |YEAR + NAICS, data=comb.data.eq)
eq.rsqvara.c <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) ~ rsquare_absretrdq + 
                         S.demeaned.lag.W.alt2.public.presence + S.demeaned.lag.W.alt2.public.presence:rsquare_absretrdq +
                         lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000) 
                     | YEAR + NAICS | 0 |YEAR + NAICS, data=comb.data.eq)

stargazer(eq.rsqvf.c, eq.rsqvea.c, eq.rsqarf.c, eq.rsqvara.c, 
          type="text", style="qje", #out="/Users/omartian/Dropbox/GO Census/Tables/JAR/TableEQ.tex", 
          float=FALSE, 
          keep.stat=c("n", "adj.rsq"), table.layout = "cd#-t-a",
          dep.var.labels = c("$\\text{ImportComp}_{i,t}$"),
          keep="public.presence|scale",
          covariate.labels = c("$\\text{PublicPresence}_{i,t-1}$", 
                               "$\\text{ICScore1}_{i,t-1}$",
                               "$\\text{ICScore2}_{i,t-1}$",
                               "$\\text{ICScore3}_{i,t-1}$",
                               "$\\text{ICScore4}_{i,t-1}$", 
                               "$\\text{PublicPresence}_{i,t-1} \\times \\text{ICScore1}_{i,t-1}$",
                               "$\\text{PublicPresence}_{i,t-1} \\times \\text{ICScore2}_{i,t-1}$",
                               "$\\text{PublicPresence}_{i,t-1} \\times \\text{ICScore3}_{i,t-1}$",
                               "$\\text{PublicPresence}_{i,t-1} \\times \\text{ICScore4}_{i,t-1}$"),
          add.lines=list(c("\\textit{Observation Level:}", rep("\\multicolumn{1}{c}{i,t}", 4), rep("\\multicolumn{1}{c}{i,j,t}", 4)),
                         "", c("\\textit{Additional Controls:}", rep("\\multicolumn{1}{c}{Yes}", 8)),
                         "", "\\textit{Fixed Effects:}", 
                         c("$\\text{Industry} (i)$",
                           rep("\\multicolumn{1}{c}{Yes}", 4)),
                         c("$\\text{Year} (t)$",
                           rep("\\multicolumn{1}{c}{Yes}", 4)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(eq.rsqvf.c$clustervar$NAICS)), 
                           length(unique(eq.rsqvea.c$clustervar$NAICS)), 
                           length(unique(eq.rsqarf.c$clustervar$NAICS)), 
                           length(unique(eq.rsqvara.c$clustervar$NAICS))),
                         c("Year",
                           length(unique(eq.rsqvf.c$clustervar$YEAR)), 
                           length(unique(eq.rsqvea.c$clustervar$YEAR)), 
                           length(unique(eq.rsqarf.c$clustervar$YEAR)), 
                           length(unique(eq.rsqvara.c$clustervar$YEAR))),
                         "",
                         c("$N$", eq.rsqvf.c$N,
                           eq.rsqvea.c$N,
                           eq.rsqarf.c$N,
                           eq.rsqvara.c$N),
                         c("$R^2$ Full Model", round(summary(eq.rsqvf.c)$r.squared,3),
                           round(summary(eq.rsqvea.c)$r.squared,3),
                           round(summary(eq.rsqarf.c)$r.squared,3),
                           round(summary(eq.rsqvara.c)$r.squared,3)),
                         c("$R^2$ Projected Model", round(summary(eq.rsqvf.c)$P.r.squared,3),
                           round(summary(eq.rsqvea.c)$P.r.squared,3),
                           round(summary(eq.rsqarf.c)$P.r.squared,3),
                           round(summary(eq.rsqvara.c)$P.r.squared,3))))

####################
# Table 7, Panel A #
####################
stargazer(sa2.c.guided.earn.ctrls.rp,sa2.c.guided.sales.ctrls.rp,sa2.c.guided.gm.ctrls.rp,sa2.c.guided.cpx.ctrls.rp,type='text',dep.var.labels.include=FALSE)



####################
# Table 7, Panel B #
####################
sa2.c.followed1yr.epssale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.EPS1*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.EPS1*lag.alt2.public.presence*1000000/lag.BSEG.SALE.y)+ 
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)        
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed1yr.salsale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.SAL1*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.SAL1*lag.alt2.public.presence*1000000/lag.BSEG.SALE.y)+ 
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)        
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed1yr.cpxsale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.CPX1*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.CPX1*lag.alt2.public.presence*1000000/lag.BSEG.SALE.y)+ 
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)        
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed1yr.grmsale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.GRM1*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.GRM1*lag.alt2.public.presence*1000000/lag.BSEG.SALE.y)+ 
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)        
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
stargazer(sa2.c.followed1yr.epssale.ctrls.rp,sa2.c.followed1yr.salsale.ctrls.rp,sa2.c.followed1yr.grmsale.ctrls.rp,sa2.c.followed1yr.cpxsale.ctrls.rp,type='text')


####################
# Table 7, Panel C #
####################

sa2.c.followed2yr.epssale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.EPS2*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.EPS2*1000000*lag.alt2.public.presence/lag.BSEG.SALE.y)+
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed3yr.epssale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.EPS3*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.EPS3*1000000*lag.alt2.public.presence/lag.BSEG.SALE.y)+
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed4yr.epssale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.EPS4*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.EPS4*1000000*lag.alt2.public.presence/lag.BSEG.SALE.y)+
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)
sa2.c.followed5yr.epssale.ctrls.rp <- felm(I((1 - frac.related) * winsor((imports.K/RCPTOT))) 
                                           ~ lag.W.alt2.public.presence+winsor(lag.ANALYSTS.EPS5*1000000/lag.BSEG.SALE.y)+winsor(lag.ANALYSTS.EPS5*1000000*lag.alt2.public.presence/lag.BSEG.SALE.y)+
                                               lag.ave.duty.rate.overall + post_ntr_ns + lag.VALADD.perc + lag.PAYANN.perc + lag.WAGERATE + lag.pct20 + lag.RCPTOT.growth + I(aggregate.v/1000000000)                                
                                           | YEAR + NAICS | 0 |YEAR + NAICS, data=combined.data.n4.followed)

stargazer(sa2.c.followed1yr.epssale.ctrls.rp,sa2.c.followed2yr.epssale.ctrls.rp,sa2.c.followed3yr.epssale.ctrls.rp,sa2.c.followed4yr.epssale.ctrls.rp,sa2.c.followed5yr.epssale.ctrls.rp,type='text')




uk.regress.pc  <- felm(S.demeaned.W.import.competition ~ S.demeaned.lag.W.public.sales.frac + S.demeaned.lag.W.disclosure 
                       | naics + year | 0 |naics + year, 
                       data=UK.data3[year < 2018  & year > 2000])
uk.regress.fc  <- felm(S.demeaned.W.import.competition ~ S.demeaned.lag.W.public.sales.frac + S.demeaned.lag.W.disclosure + 
                           scale(demeaned.US.import.comp) + S.demeaned.lag.W.alt2.public.presence | naics + year | 0 |naics + year, 
                       data=UK.data4[year < 2018  & year > 2000])
uk.regress.ihs.pc <- felm(ihs(W.import.competition) ~ ihs(lag.W.public.sales.frac) + ihs(lag.W.disclosure) | naics + year | 0 |naics + year, 
                          data=UK.data3[year < 2018  & year > 2000])
uk.regress.ihs.fc <- felm(ihs(W.import.competition) ~ ihs(lag.W.public.sales.frac) + ihs(lag.W.disclosure) + 
                              ihs((1-frac.related)*winsor(imports.K/RCPTOT)) + ihs(lag.W.alt2.public.presence)| naics + year | 0 |naics + year, 
                          data=UK.data4[year < 2018  & year > 2000])

###########
# Table 8 #
###########

stargazer(uk.regress.pc, uk.regress.fc, uk.regress.ihs.pc, uk.regress.ihs.fc, 
          type='text', table.layout = "cd#-t-a", #out="/Users/omartian/Dropbox/GO Census/Tables/JAR/Revision/TableUK.tex", 
          float=FALSE, 
          dep.var.labels = c("$\\text{UKImportComp}_{i,t}$","$\\arcsinh(\\text{UKImportComp}_{i,t})$"),
          covariate.labels = c("$\\text{UKPublicPresence}_{i,t-1}$",
                               "$\\text{UKDisclosure}_{i,t-1}$", 
                               "$\\text{USImportComp}_{i,t}$",
                               "$\\text{USPublicPresence}_{i,t-1}$",
                               "$\\arcsinh(\\text{UKPublicPresence}_{i,t-1})$",
                               "$\\arcsinh(\\text{UKDisclosure}_{i,t-1})$", 
                               "$\\arcsinh(\\text{USImportComp}_{i,t})$",
                               "$\\arcsinh(\\text{USPublicPresence}_{i,t-1})$"),
          add.lines=list(c("\\textit{Observation Level:}", rep("\\multicolumn{1}{c}{i,t}", 4)), 
                         "", "\\textit{Fixed Effects:}", 
                         c("$\\text{Industry} (i)$",
                           rep("\\multicolumn{1}{c}{Yes}", 4)),
                         c("$\\text{Year} (t)$",
                           rep("\\multicolumn{1}{c}{Yes}", 4)),
                         "",
                         "\\textit{Number of Clusters:}",
                         c("Industry", 
                           length(unique(uk.regress.pc$clustervar$naics)),
                           length(unique(uk.regress.fc$clustervar$naics)), 
                           length(unique(uk.regress.ihs.pc$clustervar$naics)), 
                           length(unique(uk.regress.ihs.fc$clustervar$naics))),
                         c("Year",
                           length(unique(uk.regress.pc$clustervar$year)), 
                           length(unique(uk.regress.fc$clustervar$year)), 
                           length(unique(uk.regress.ihs.pc$clustervar$year)),
                           length(unique(uk.regress.ihs.fc$clustervar$year))),
                         "",
                         c("$N$", uk.regress.pc$N, uk.regress.fc$N,
                           uk.regress.ihs.pc$N, uk.regress.ihs.fc$N),
                         c("$R^2$ Full Model", 
                           round(summary(uk.regress.pc)$r.squared,3),
                           round(summary(uk.regress.fc)$r.squared,3),
                           round(summary(uk.regress.ihs.pc)$r.squared,3),
                           round(summary(uk.regress.ihs.fc)$r.squared,3)),
                         c("$R^2$ Projected Model", 
                           round(summary(uk.regress.pc)$P.r.squared,3),
                           round(summary(uk.regress.fc)$P.r.squared,3),
                           round(summary(uk.regress.ihs.pc)$P.r.squared,3),
                           round(summary(uk.regress.ihs.fc)$P.r.squared,3))))

